From cb3f1ce3b03e961d2bdd71e0ea3df9eab101427d Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 20 Mar 2018 10:01:15 -0700 Subject: [PATCH 1/4] Auto-update dependencies. (#1060) * Auto-update dependencies. * Rollback failed updates. --- appengine-java8/analytics/pom.xml | 2 +- appengine-java8/bigquery/pom.xml | 6 +++--- appengine-java8/bigtable/pom.xml | 2 +- appengine-java8/cloudsql-postgres/pom.xml | 2 +- appengine-java8/datastore/pom.xml | 2 +- appengine-java8/guestbook-cloud-datastore/pom.xml | 2 +- appengine-java8/pubsub/pom.xml | 4 ++-- appengine-java8/spanner/pom.xml | 2 +- appengine-java8/taskqueues-deferred/pom.xml | 2 +- appengine-java8/translate-pubsub/pom.xml | 6 +++--- appengine/firebase-tictactoe/pom.xml | 2 +- appengine/taskqueue/defer-samples/pom.xml | 2 +- bigquery/cloud-client/pom.xml | 2 +- bigquery/datatransfer/cloud-client/pom.xml | 2 +- cloud-tasks/pom.xml | 2 +- compute/cmdline/pom.xml | 2 +- datastore/cloud-client/pom.xml | 2 +- datastore/pom.xml | 2 +- dlp/pom.xml | 2 +- errorreporting/pom.xml | 2 +- firestore/pom.xml | 2 +- flexible/cloud-tasks/pom.xml | 2 +- flexible/cloudstorage/pom.xml | 2 +- flexible/datastore/pom.xml | 2 +- flexible/errorreporting/pom.xml | 2 +- flexible/postgres/pom.xml | 2 +- flexible/pubsub/pom.xml | 4 ++-- flexible/sparkjava/pom.xml | 4 ++-- iap/pom.xml | 2 +- kms/pom.xml | 2 +- language/analysis/pom.xml | 2 +- language/cloud-client/pom.xml | 2 +- logging/cloud-client/pom.xml | 2 +- logging/jul/pom.xml | 2 +- logging/logback/pom.xml | 2 +- pubsub/cloud-client/pom.xml | 2 +- speech/cloud-client/pom.xml | 2 +- storage/cloud-client/pom.xml | 2 +- storage/json-api/pom.xml | 2 +- storage/storage-transfer/pom.xml | 2 +- storage/xml-api/cmdline-sample/pom.xml | 2 +- translate/cloud-client/pom.xml | 2 +- translate/pom.xml | 2 +- video/cloud-client/pom.xml | 2 +- vision/beta/cloud-client/pom.xml | 2 +- vision/cloud-client/pom.xml | 2 +- vision/face-detection/pom.xml | 2 +- vision/label/pom.xml | 2 +- vision/landmark-detection/pom.xml | 2 +- vision/text/pom.xml | 2 +- 50 files changed, 57 insertions(+), 57 deletions(-) diff --git a/appengine-java8/analytics/pom.xml b/appengine-java8/analytics/pom.xml index cb941f40bdb..232fafb19d6 100644 --- a/appengine-java8/analytics/pom.xml +++ b/appengine-java8/analytics/pom.xml @@ -87,7 +87,7 @@ org.mockito mockito-core - 2.15.0 + 2.16.0 test diff --git a/appengine-java8/bigquery/pom.xml b/appengine-java8/bigquery/pom.xml index 6d233cfc872..5a3bfdfd127 100644 --- a/appengine-java8/bigquery/pom.xml +++ b/appengine-java8/bigquery/pom.xml @@ -53,12 +53,12 @@ com.google.cloud google-cloud-bigquery - 0.37.0-beta + 1.22.0 com.google.cloud google-cloud-monitoring - 0.37.0-beta + 0.40.0-beta @@ -96,7 +96,7 @@ org.mockito mockito-core - 2.15.0 + 2.16.0 test diff --git a/appengine-java8/bigtable/pom.xml b/appengine-java8/bigtable/pom.xml index 9013f11de05..7ac24c65bc9 100644 --- a/appengine-java8/bigtable/pom.xml +++ b/appengine-java8/bigtable/pom.xml @@ -46,7 +46,7 @@ limitations under the License. com.google.cloud.bigtable bigtable-hbase-1.x - 1.1.2 + 1.2.0 diff --git a/appengine-java8/cloudsql-postgres/pom.xml b/appengine-java8/cloudsql-postgres/pom.xml index 5265f433bfc..5dacb3c40cb 100644 --- a/appengine-java8/cloudsql-postgres/pom.xml +++ b/appengine-java8/cloudsql-postgres/pom.xml @@ -72,7 +72,7 @@ org.postgresql postgresql - 42.2.1 + 42.2.2 diff --git a/appengine-java8/datastore/pom.xml b/appengine-java8/datastore/pom.xml index 4a19ab1477a..e8207b021ba 100644 --- a/appengine-java8/datastore/pom.xml +++ b/appengine-java8/datastore/pom.xml @@ -53,7 +53,7 @@ com.google.auto.value auto-value - 1.5.3 + 1.5.4 provided diff --git a/appengine-java8/guestbook-cloud-datastore/pom.xml b/appengine-java8/guestbook-cloud-datastore/pom.xml index f9cb164ebd2..aa6c4f67e1e 100644 --- a/appengine-java8/guestbook-cloud-datastore/pom.xml +++ b/appengine-java8/guestbook-cloud-datastore/pom.xml @@ -65,7 +65,7 @@ com.google.cloud google-cloud - 0.37.0-alpha + 0.40.0-alpha diff --git a/appengine-java8/pubsub/pom.xml b/appengine-java8/pubsub/pom.xml index 90f90ca90b7..156de88edec 100644 --- a/appengine-java8/pubsub/pom.xml +++ b/appengine-java8/pubsub/pom.xml @@ -52,12 +52,12 @@ com.google.cloud google-cloud-pubsub - 0.37.0-beta + 0.40.0-beta com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/appengine-java8/spanner/pom.xml b/appengine-java8/spanner/pom.xml index 36a06c7eb62..dc61fae644f 100644 --- a/appengine-java8/spanner/pom.xml +++ b/appengine-java8/spanner/pom.xml @@ -44,7 +44,7 @@ com.google.cloud google-cloud-spanner - 0.37.0-beta + 0.40.0-beta javax.servlet diff --git a/appengine-java8/taskqueues-deferred/pom.xml b/appengine-java8/taskqueues-deferred/pom.xml index 9b3757a43b1..3b046eb990a 100644 --- a/appengine-java8/taskqueues-deferred/pom.xml +++ b/appengine-java8/taskqueues-deferred/pom.xml @@ -66,7 +66,7 @@ org.mockito mockito-core - 2.15.0 + 2.16.0 com.google.appengine diff --git a/appengine-java8/translate-pubsub/pom.xml b/appengine-java8/translate-pubsub/pom.xml index 60bc98e8097..41d8435952a 100644 --- a/appengine-java8/translate-pubsub/pom.xml +++ b/appengine-java8/translate-pubsub/pom.xml @@ -49,17 +49,17 @@ com.google.cloud google-cloud-pubsub - 0.37.0-beta + 0.40.0-beta com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 com.google.cloud google-cloud-translate - 1.19.0 + 1.22.0 diff --git a/appengine/firebase-tictactoe/pom.xml b/appengine/firebase-tictactoe/pom.xml index df1568cd09c..ee483b2d1da 100644 --- a/appengine/firebase-tictactoe/pom.xml +++ b/appengine/firebase-tictactoe/pom.xml @@ -65,7 +65,7 @@ org.mockito mockito-core - 2.15.0 + 2.16.0 com.google.appengine diff --git a/appengine/taskqueue/defer-samples/pom.xml b/appengine/taskqueue/defer-samples/pom.xml index e5c065b4ee0..7ca5d6c027e 100644 --- a/appengine/taskqueue/defer-samples/pom.xml +++ b/appengine/taskqueue/defer-samples/pom.xml @@ -63,7 +63,7 @@ org.mockito mockito-core - 2.15.0 + 2.16.0 com.google.appengine diff --git a/bigquery/cloud-client/pom.xml b/bigquery/cloud-client/pom.xml index 190bfa161d9..06221e40dc5 100644 --- a/bigquery/cloud-client/pom.xml +++ b/bigquery/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-bigquery - 0.37.0-beta + 1.22.0 diff --git a/bigquery/datatransfer/cloud-client/pom.xml b/bigquery/datatransfer/cloud-client/pom.xml index d1824ff8cf6..686ed2f33c9 100644 --- a/bigquery/datatransfer/cloud-client/pom.xml +++ b/bigquery/datatransfer/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-bigquerydatatransfer - 0.37.0-beta + 0.40.0-beta diff --git a/cloud-tasks/pom.xml b/cloud-tasks/pom.xml index bf08b31d66e..71ea7c5ccb2 100644 --- a/cloud-tasks/pom.xml +++ b/cloud-tasks/pom.xml @@ -39,7 +39,7 @@ com.google.apis google-api-services-cloudtasks - v2beta2-rev37-1.23.0 + v2beta2-rev39-1.23.0 com.google.api-client diff --git a/compute/cmdline/pom.xml b/compute/cmdline/pom.xml index 0a6bb54cdbb..f6c5d65cd20 100644 --- a/compute/cmdline/pom.xml +++ b/compute/cmdline/pom.xml @@ -32,7 +32,7 @@ limitations under the License. 1.8 1.8 - v1-rev169-1.23.0 + v1-rev171-1.23.0 UTF-8 diff --git a/datastore/cloud-client/pom.xml b/datastore/cloud-client/pom.xml index 0f0e8dd76cb..189a2fddbd2 100644 --- a/datastore/cloud-client/pom.xml +++ b/datastore/cloud-client/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/datastore/pom.xml b/datastore/pom.xml index f70964b03e7..17c618c2616 100644 --- a/datastore/pom.xml +++ b/datastore/pom.xml @@ -44,7 +44,7 @@ com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/dlp/pom.xml b/dlp/pom.xml index 7f89ff03d6c..3999c2fc065 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -44,7 +44,7 @@ com.google.cloud google-cloud-dlp - 0.37.0-beta + 0.40.0-beta diff --git a/errorreporting/pom.xml b/errorreporting/pom.xml index 87c6cecd48a..aa8381161e3 100644 --- a/errorreporting/pom.xml +++ b/errorreporting/pom.xml @@ -38,7 +38,7 @@ limitations under the License. com.google.cloud google-cloud-errorreporting - 0.37.0-beta + 0.40.0-beta diff --git a/firestore/pom.xml b/firestore/pom.xml index dc56cb6e6e4..55b0b67218f 100644 --- a/firestore/pom.xml +++ b/firestore/pom.xml @@ -45,7 +45,7 @@ com.google.cloud google-cloud-firestore - 0.37.0-beta + 0.40.0-beta diff --git a/flexible/cloud-tasks/pom.xml b/flexible/cloud-tasks/pom.xml index 6fa6f45fbdf..bd3d4c89d2b 100644 --- a/flexible/cloud-tasks/pom.xml +++ b/flexible/cloud-tasks/pom.xml @@ -46,7 +46,7 @@ com.google.apis google-api-services-cloudtasks - v2beta2-rev37-1.23.0 + v2beta2-rev39-1.23.0 com.google.appengine diff --git a/flexible/cloudstorage/pom.xml b/flexible/cloudstorage/pom.xml index d6eb0ba0877..acaa2ee82a7 100644 --- a/flexible/cloudstorage/pom.xml +++ b/flexible/cloudstorage/pom.xml @@ -52,7 +52,7 @@ com.google.cloud google-cloud-storage - 1.19.0 + 1.22.0 diff --git a/flexible/datastore/pom.xml b/flexible/datastore/pom.xml index 6f1b8f570e2..c098e3273c6 100644 --- a/flexible/datastore/pom.xml +++ b/flexible/datastore/pom.xml @@ -49,7 +49,7 @@ com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/flexible/errorreporting/pom.xml b/flexible/errorreporting/pom.xml index 8e71fbf36b0..ae31e12f3f8 100644 --- a/flexible/errorreporting/pom.xml +++ b/flexible/errorreporting/pom.xml @@ -50,7 +50,7 @@ com.google.cloud google-cloud-errorreporting - 0.37.0-beta + 0.40.0-beta diff --git a/flexible/postgres/pom.xml b/flexible/postgres/pom.xml index 188b7e82ffc..20f57f28542 100644 --- a/flexible/postgres/pom.xml +++ b/flexible/postgres/pom.xml @@ -79,7 +79,7 @@ org.postgresql postgresql - 42.2.1 + 42.2.2 diff --git a/flexible/pubsub/pom.xml b/flexible/pubsub/pom.xml index 73eab429950..e23428fa23d 100644 --- a/flexible/pubsub/pom.xml +++ b/flexible/pubsub/pom.xml @@ -66,12 +66,12 @@ com.google.cloud google-cloud-pubsub - 0.37.0-beta + 0.40.0-beta com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/flexible/sparkjava/pom.xml b/flexible/sparkjava/pom.xml index b3a8b14e54b..a30b8a8a5e0 100644 --- a/flexible/sparkjava/pom.xml +++ b/flexible/sparkjava/pom.xml @@ -44,7 +44,7 @@ limitations under the License. com.sparkjava spark-core - 2.7.1 + 2.7.2 org.slf4j @@ -64,7 +64,7 @@ limitations under the License. com.google.cloud google-cloud-datastore - 1.19.0 + 1.22.0 diff --git a/iap/pom.xml b/iap/pom.xml index 9340c8111e9..60c6137360b 100644 --- a/iap/pom.xml +++ b/iap/pom.xml @@ -63,7 +63,7 @@ com.nimbusds nimbus-jose-jwt - 5.5 + 5.7 diff --git a/kms/pom.xml b/kms/pom.xml index eb818ca79ad..90e10b9aaf8 100644 --- a/kms/pom.xml +++ b/kms/pom.xml @@ -25,7 +25,7 @@ com.google.apis google-api-services-cloudkms - v1-rev35-1.23.0 + v1-rev40-1.23.0 com.google.guava diff --git a/language/analysis/pom.xml b/language/analysis/pom.xml index a4a072740c6..2dd30d892ae 100644 --- a/language/analysis/pom.xml +++ b/language/analysis/pom.xml @@ -39,7 +39,7 @@ limitations under the License. com.google.cloud google-cloud-language - 1.19.0 + 1.22.0 com.google.guava diff --git a/language/cloud-client/pom.xml b/language/cloud-client/pom.xml index 7054f67b252..4c4d28a0e76 100644 --- a/language/cloud-client/pom.xml +++ b/language/cloud-client/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-language - 1.19.0 + 1.22.0 com.google.guava diff --git a/logging/cloud-client/pom.xml b/logging/cloud-client/pom.xml index 8df32c32f68..2d1ef2d8b7a 100644 --- a/logging/cloud-client/pom.xml +++ b/logging/cloud-client/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-logging - 1.19.0 + 1.22.0 diff --git a/logging/jul/pom.xml b/logging/jul/pom.xml index 52185eadfed..af6572c0152 100644 --- a/logging/jul/pom.xml +++ b/logging/jul/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-logging - 1.19.0 + 1.22.0 diff --git a/logging/logback/pom.xml b/logging/logback/pom.xml index 4074bee3517..1d42bfb0719 100644 --- a/logging/logback/pom.xml +++ b/logging/logback/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-logging-logback - 0.37.0-alpha + 0.40.0-alpha diff --git a/pubsub/cloud-client/pom.xml b/pubsub/cloud-client/pom.xml index 3e5d0d60009..9c38a1b0a80 100644 --- a/pubsub/cloud-client/pom.xml +++ b/pubsub/cloud-client/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-pubsub - 0.37.0-beta + 0.40.0-beta diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index 730fd6061b5..66a0451a532 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-speech - 0.37.0-alpha + 0.40.0-alpha diff --git a/storage/cloud-client/pom.xml b/storage/cloud-client/pom.xml index 37e05be0061..e5569b3a6de 100644 --- a/storage/cloud-client/pom.xml +++ b/storage/cloud-client/pom.xml @@ -39,7 +39,7 @@ com.google.cloud google-cloud-storage - 1.19.0 + 1.22.0 diff --git a/storage/json-api/pom.xml b/storage/json-api/pom.xml index c380c45d863..7fb984b8181 100644 --- a/storage/json-api/pom.xml +++ b/storage/json-api/pom.xml @@ -41,7 +41,7 @@ com.google.apis google-api-services-storage - v1-rev120-1.23.0 + v1-rev122-1.23.0 com.google.guava diff --git a/storage/storage-transfer/pom.xml b/storage/storage-transfer/pom.xml index bf2ddf99955..045da93688d 100644 --- a/storage/storage-transfer/pom.xml +++ b/storage/storage-transfer/pom.xml @@ -43,7 +43,7 @@ com.google.apis google-api-services-storagetransfer - v1-rev50-1.23.0 + v1-rev51-1.23.0 com.google.guava diff --git a/storage/xml-api/cmdline-sample/pom.xml b/storage/xml-api/cmdline-sample/pom.xml index 35f4574b827..53c48b471cd 100644 --- a/storage/xml-api/cmdline-sample/pom.xml +++ b/storage/xml-api/cmdline-sample/pom.xml @@ -70,7 +70,7 @@ com.google.apis google-api-services-storage - v1-rev120-1.23.0 + v1-rev122-1.23.0 com.google.guava diff --git a/translate/cloud-client/pom.xml b/translate/cloud-client/pom.xml index ef09d3288b0..3102a59f4f1 100644 --- a/translate/cloud-client/pom.xml +++ b/translate/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-translate - 1.19.0 + 1.22.0 diff --git a/translate/pom.xml b/translate/pom.xml index 1328c476618..4d330ae182a 100644 --- a/translate/pom.xml +++ b/translate/pom.xml @@ -41,7 +41,7 @@ limitations under the License. com.google.cloud google-cloud-translate - 1.19.0 + 1.22.0 junit diff --git a/video/cloud-client/pom.xml b/video/cloud-client/pom.xml index cc8f3cffeed..45ce571cc09 100644 --- a/video/cloud-client/pom.xml +++ b/video/cloud-client/pom.xml @@ -45,7 +45,7 @@ com.google.cloud google-cloud-video-intelligence - 0.37.0-beta + 0.40.0-beta diff --git a/vision/beta/cloud-client/pom.xml b/vision/beta/cloud-client/pom.xml index c0d9e8e893d..432a81a4ac5 100644 --- a/vision/beta/cloud-client/pom.xml +++ b/vision/beta/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-vision - 1.19.0 + 1.22.0 diff --git a/vision/cloud-client/pom.xml b/vision/cloud-client/pom.xml index a6bbfc47835..ac0fa725de1 100644 --- a/vision/cloud-client/pom.xml +++ b/vision/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-vision - 1.19.0 + 1.22.0 diff --git a/vision/face-detection/pom.xml b/vision/face-detection/pom.xml index 391b21187d0..196933ad8a9 100644 --- a/vision/face-detection/pom.xml +++ b/vision/face-detection/pom.xml @@ -41,7 +41,7 @@ com.google.apis google-api-services-vision - v1-rev371-1.23.0 + v1-rev373-1.23.0 com.google.api-client diff --git a/vision/label/pom.xml b/vision/label/pom.xml index 129a117edfb..8b7508953a3 100644 --- a/vision/label/pom.xml +++ b/vision/label/pom.xml @@ -40,7 +40,7 @@ com.google.apis google-api-services-vision - v1-rev371-1.23.0 + v1-rev373-1.23.0 com.google.api-client diff --git a/vision/landmark-detection/pom.xml b/vision/landmark-detection/pom.xml index a5daa8da4e4..a6e3cdd95e4 100644 --- a/vision/landmark-detection/pom.xml +++ b/vision/landmark-detection/pom.xml @@ -40,7 +40,7 @@ com.google.apis google-api-services-vision - v1-rev371-1.23.0 + v1-rev373-1.23.0 com.google.api-client diff --git a/vision/text/pom.xml b/vision/text/pom.xml index a4bd8ef2827..19f0c7af4ad 100644 --- a/vision/text/pom.xml +++ b/vision/text/pom.xml @@ -40,7 +40,7 @@ com.google.apis google-api-services-vision - v1-rev371-1.23.0 + v1-rev373-1.23.0 com.google.api-client From d332e2e35e63f19e7b64982690ab31ea4897a82f Mon Sep 17 00:00:00 2001 From: Jisha Abubaker Date: Tue, 20 Mar 2018 13:25:24 -0700 Subject: [PATCH 2/4] DLP => v2 (#1056) * DLP => v2 (WIP) Pending tasks: -> Update / Add Tests -> Region tag / comment review -> Submit for code review + fixes -> Merge once google-cloud-java PR : https://github.com/GoogleCloudPlatform/google-cloud-java/pull/2958 is released * Update to most recent versioning. * Updated DeIdentification samples and tests. * Revert pubsub to public version. * Fix Inspect samples/tests (minus pubsub). * Updated Jobs and add tests. * Updated Metadata classes. * Updated QuickStart tests and samples. * Updated Redact samples and tests. * Updated RiskAnalysis. * Update Template samples. * Update trigger tests. * Make Checkstyle Happy Again. * Fix (and ignore) tests using pubsub. * Update PR tests to complete all tests before returning results. (#1065) * Return results of all tests. * Use for loop instead of while. * WIP: Address PR feedback, part 1 * Update deps * Address PR feedback * Remove mvn clean verify failure * Add ReID FPE sample * Address PR feedback * Add k-map sample * checkstyle fixes --- dlp/pom.xml | 52 +- .../com/example/dlp/DeIdentification.java | 532 +++++++++-- .../main/java/com/example/dlp/Inspect.java | 683 ++++++++----- dlp/src/main/java/com/example/dlp/Jobs.java | 147 +++ .../main/java/com/example/dlp/Metadata.java | 61 +- .../main/java/com/example/dlp/QuickStart.java | 69 +- dlp/src/main/java/com/example/dlp/Redact.java | 207 ++-- .../java/com/example/dlp/RiskAnalysis.java | 896 +++++++++++++----- .../main/java/com/example/dlp/Templates.java | 263 +++++ .../main/java/com/example/dlp/Triggers.java | 299 ++++++ .../com/example/dlp/DeIdentificationIT.java | 106 ++- .../test/java/com/example/dlp/InspectIT.java | 104 +- dlp/src/test/java/com/example/dlp/JobsIT.java | 85 ++ .../test/java/com/example/dlp/MetadataIT.java | 27 +- .../java/com/example/dlp/QuickStartIT.java | 11 +- .../test/java/com/example/dlp/RedactIT.java | 53 +- .../java/com/example/dlp/RiskAnalysisIT.java | 136 ++- .../java/com/example/dlp/TemplatesIT.java | 93 ++ .../test/java/com/example/dlp/TriggersIT.java | 106 +++ dlp/src/test/resources/dates.csv | 5 + dlp/src/test/resources/results.correct.csv | 5 + 21 files changed, 3010 insertions(+), 930 deletions(-) create mode 100644 dlp/src/main/java/com/example/dlp/Jobs.java create mode 100644 dlp/src/main/java/com/example/dlp/Templates.java create mode 100644 dlp/src/main/java/com/example/dlp/Triggers.java create mode 100644 dlp/src/test/java/com/example/dlp/JobsIT.java create mode 100644 dlp/src/test/java/com/example/dlp/TemplatesIT.java create mode 100644 dlp/src/test/java/com/example/dlp/TriggersIT.java create mode 100644 dlp/src/test/resources/dates.csv create mode 100644 dlp/src/test/resources/results.correct.csv diff --git a/dlp/pom.xml b/dlp/pom.xml index 3999c2fc065..9108bd49f4e 100644 --- a/dlp/pom.xml +++ b/dlp/pom.xml @@ -30,6 +30,7 @@ com.google.cloud.samples shared-configuration 1.0.8 + @@ -40,13 +41,16 @@ - com.google.cloud google-cloud-dlp 0.40.0-beta - + + com.google.cloud + google-cloud-pubsub + 0.40.0-beta + commons-cli commons-cli @@ -60,27 +64,27 @@ - - - - maven-assembly-plugin - 3.0.0 - - - jar-with-dependencies - - - - - make-assembly - package - - single - - - - - - + + + + maven-assembly-plugin + 3.0.0 + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + diff --git a/dlp/src/main/java/com/example/dlp/DeIdentification.java b/dlp/src/main/java/com/example/dlp/DeIdentification.java index 0e98bbf2041..8ffa46bc0e9 100644 --- a/dlp/src/main/java/com/example/dlp/DeIdentification.java +++ b/dlp/src/main/java/com/example/dlp/DeIdentification.java @@ -16,21 +16,49 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; import com.google.common.io.BaseEncoding; -import com.google.privacy.dlp.v2beta1.CharacterMaskConfig; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.CryptoKey; -import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig; -import com.google.privacy.dlp.v2beta1.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; -import com.google.privacy.dlp.v2beta1.DeidentifyConfig; -import com.google.privacy.dlp.v2beta1.DeidentifyContentRequest; -import com.google.privacy.dlp.v2beta1.DeidentifyContentResponse; -import com.google.privacy.dlp.v2beta1.InfoTypeTransformations; -import com.google.privacy.dlp.v2beta1.InfoTypeTransformations.InfoTypeTransformation; -import com.google.privacy.dlp.v2beta1.KmsWrappedCryptoKey; -import com.google.privacy.dlp.v2beta1.PrimitiveTransformation; +import com.google.privacy.dlp.v2.CharacterMaskConfig; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.CryptoKey; +import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig; +import com.google.privacy.dlp.v2.CryptoReplaceFfxFpeConfig.FfxCommonNativeAlphabet; +import com.google.privacy.dlp.v2.CustomInfoType; +import com.google.privacy.dlp.v2.CustomInfoType.SurrogateType; +import com.google.privacy.dlp.v2.DateShiftConfig; +import com.google.privacy.dlp.v2.DeidentifyConfig; +import com.google.privacy.dlp.v2.DeidentifyContentRequest; +import com.google.privacy.dlp.v2.DeidentifyContentResponse; +import com.google.privacy.dlp.v2.FieldId; +import com.google.privacy.dlp.v2.FieldTransformation; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InfoTypeTransformations; +import com.google.privacy.dlp.v2.InfoTypeTransformations.InfoTypeTransformation; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.KmsWrappedCryptoKey; +import com.google.privacy.dlp.v2.PrimitiveTransformation; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.privacy.dlp.v2.RecordTransformations; +import com.google.privacy.dlp.v2.ReidentifyContentRequest; +import com.google.privacy.dlp.v2.ReidentifyContentResponse; +import com.google.privacy.dlp.v2.Table; +import com.google.privacy.dlp.v2.Value; import com.google.protobuf.ByteString; +import com.google.type.Date; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; @@ -42,31 +70,23 @@ public class DeIdentification { + // [START dlp_deidentify_masking] + /** + * Deidentify a string by masking sensitive information with a character using the DLP API. + * + * @param string The string to deidentify. + * @param maskingCharacter (Optional) The character to mask sensitive data with. + * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. + * Omitting this value or setting it to 0 masks all sensitive chars. + * @param projectId ID of Google Cloud project to run the API under. + */ private static void deIdentifyWithMask( - String string, - Character maskingCharacter, - int numberToMask) { - // [START dlp_deidentify_masking] - /** - * Deidentify a string by masking sensitive information with a character using the DLP API. - * @param string The string to deidentify. - * @param maskingCharacter (Optional) The character to mask sensitive data with. - * @param numberToMask (Optional) The number of characters' worth of sensitive data to mask. - * Omitting this value or setting it to 0 masks all sensitive chars. - */ + String string, Character maskingCharacter, int numberToMask, String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // string = "My SSN is 372819127"; - // numberToMask = 5; - // maskingCharacter = 'x'; - - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setValue(string) - .build(); + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); CharacterMaskConfig characterMaskConfig = CharacterMaskConfig.newBuilder() @@ -76,9 +96,7 @@ private static void deIdentifyWithMask( // Create the deidentification transformation configuration PrimitiveTransformation primitiveTransformation = - PrimitiveTransformation.newBuilder() - .setCharacterMaskConfig(characterMaskConfig) - .build(); + PrimitiveTransformation.newBuilder().setCharacterMaskConfig(characterMaskConfig).build(); InfoTypeTransformation infoTypeTransformationObject = InfoTypeTransformation.newBuilder() @@ -90,16 +108,17 @@ private static void deIdentifyWithMask( .addTransformations(infoTypeTransformationObject) .build(); - // Create the deidentification request object DeidentifyConfig deidentifyConfig = DeidentifyConfig.newBuilder() .setInfoTypeTransformations(infoTypeTransformationArray) .build(); + // Create the deidentification request object DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setDeidentifyConfig(deidentifyConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Execute the deidentification request @@ -107,40 +126,35 @@ private static void deIdentifyWithMask( // Print the character-masked input value // e.g. "My SSN is 123456789" --> "My SSN is *********" - for (ContentItem item : response.getItemsList()) { - System.out.println(item.getValue()); - } + String result = response.getItem().getValue(); + System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithMask: " + e.getMessage()); } - // [END dlp_deidentify_masking] } + // [END dlp_deidentify_mask] + // [START dlp_deidentify_fpe] + /** + * Deidentify a string by encrypting sensitive information while preserving format. + * + * @param string The string to deidentify. + * @param alphabet The set of characters to use when encrypting the input. For more information, + * see cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify + * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. + * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. + * @param projectId ID of Google Cloud project to run the API under. + */ private static void deIdentifyWithFpe( - String string, FfxCommonNativeAlphabet alphabet, String keyName, String wrappedKey) { - // [START dlp_deidentify_fpe] - /** - * Deidentify a string by encrypting sensitive information while preserving format. - * @param string The string to deidentify. - * @param alphabet The set of characters to use when encrypting the input. For more information, - * see cloud.google.com/dlp/docs/reference/rest/v2beta1/content/deidentify - * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. - * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. - */ - + String string, + FfxCommonNativeAlphabet alphabet, + String keyName, + String wrappedKey, + String projectId, + String surrogateType) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // string = "My SSN is 372819127"; - // alphabet = FfxCommonNativeAlphabet.ALPHA_NUMERIC; - // keyName = "projects/GCP_PROJECT/locations/REGION/keyRings/KEYRING_ID/cryptoKeys/KEY_NAME"; - // wrappedKey = "YOUR_ENCRYPTED_AES_256_KEY" - - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setValue(string) - .build(); + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); // Create the format-preserving encryption (FPE) configuration KmsWrappedCryptoKey kmsWrappedCryptoKey = @@ -149,15 +163,13 @@ private static void deIdentifyWithFpe( .setCryptoKeyName(keyName) .build(); - CryptoKey cryptoKey = - CryptoKey.newBuilder() - .setKmsWrapped(kmsWrappedCryptoKey) - .build(); + CryptoKey cryptoKey = CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build(); CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = CryptoReplaceFfxFpeConfig.newBuilder() .setCryptoKey(cryptoKey) .setCommonAlphabet(alphabet) + .setSurrogateInfoType(InfoType.newBuilder().setName(surrogateType).build()) .build(); // Create the deidentification transformation configuration @@ -184,8 +196,9 @@ private static void deIdentifyWithFpe( DeidentifyContentRequest request = DeidentifyContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setDeidentifyConfig(deidentifyConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Execute the deidentification request @@ -193,30 +206,310 @@ private static void deIdentifyWithFpe( // Print the deidentified input value // e.g. "My SSN is 123456789" --> "My SSN is 7261298621" - for (ContentItem item : response.getItemsList()) { - System.out.println(item.getValue()); - } + String result = response.getItem().getValue(); + System.out.println(result); } catch (Exception e) { System.out.println("Error in deidentifyWithFpe: " + e.getMessage()); } - // [END dlp_deidentify_fpe] } + // [END dlp_deidentify_fpe] + + // [START dlp_reidentify_fpe] + /** + * Reidentify a string by encrypting sensitive information while preserving format. + * + * @param string The string to reidentify. + * @param alphabet The set of characters used when encrypting the input. For more information, see + * cloud.google.com/dlp/docs/reference/rest/v2/content/deidentify + * @param keyName The name of the Cloud KMS key to use when decrypting the wrapped key. + * @param wrappedKey The encrypted (or "wrapped") AES-256 encryption key. + * @param projectId ID of Google Cloud project to run the API under. + * @param surrogateType The name of the surrogate custom info type to used during the encryption + * process. + */ + private static void reIdentifyWithFpe( + String string, + FfxCommonNativeAlphabet alphabet, + String keyName, + String wrappedKey, + String projectId, + String surrogateType) { + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ContentItem contentItem = ContentItem.newBuilder().setValue(string).build(); + + InfoType surrogateTypeObject = InfoType.newBuilder().setName(surrogateType).build(); + + // Create the format-preserving encryption (FPE) configuration + KmsWrappedCryptoKey kmsWrappedCryptoKey = + KmsWrappedCryptoKey.newBuilder() + .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) + .setCryptoKeyName(keyName) + .build(); + + CryptoKey cryptoKey = CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build(); + + CryptoReplaceFfxFpeConfig cryptoReplaceFfxFpeConfig = + CryptoReplaceFfxFpeConfig.newBuilder() + .setCryptoKey(cryptoKey) + .setCommonAlphabet(alphabet) + .setSurrogateInfoType(surrogateTypeObject) + .build(); + + // Create the deidentification transformation configuration + PrimitiveTransformation primitiveTransformation = + PrimitiveTransformation.newBuilder() + .setCryptoReplaceFfxFpeConfig(cryptoReplaceFfxFpeConfig) + .build(); + + InfoTypeTransformation infoTypeTransformationObject = + InfoTypeTransformation.newBuilder() + .setPrimitiveTransformation(primitiveTransformation) + .addInfoTypes(surrogateTypeObject) + .build(); + + InfoTypeTransformations infoTypeTransformationArray = + InfoTypeTransformations.newBuilder() + .addTransformations(infoTypeTransformationObject) + .build(); + + // Create the inspection config + CustomInfoType customInfoType = + CustomInfoType.newBuilder() + .setInfoType(surrogateTypeObject) + .setSurrogateType(SurrogateType.newBuilder().build()) + .build(); + + InspectConfig inspectConfig = + InspectConfig.newBuilder().addCustomInfoTypes(customInfoType).build(); + + // Create the reidentification request object + DeidentifyConfig reidentifyConfig = + DeidentifyConfig.newBuilder() + .setInfoTypeTransformations(infoTypeTransformationArray) + .build(); + + ReidentifyContentRequest request = + ReidentifyContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setReidentifyConfig(reidentifyConfig) + .setInspectConfig(inspectConfig) + .setItem(contentItem) + .build(); + + // Execute the deidentification request + ReidentifyContentResponse response = dlpServiceClient.reidentifyContent(request); + + // Print the reidentified input value + // e.g. "My SSN is 7261298621" --> "My SSN is 123456789" + String result = response.getItem().getValue(); + System.out.println(result); + } catch (Exception e) { + System.out.println("Error in reidentifyWithFpe: " + e.getMessage()); + } + } + // [END dlp_reidentify_fpe] + + // [START dlp_deidentify_date_shift] + /** + * @param inputCsvPath The path to the CSV file to deidentify + * @param outputCsvPath (Optional) path to the output CSV file + * @param dateFields The list of (date) fields in the CSV file to date shift + * @param lowerBoundDays The maximum number of days to shift a date backward + * @param upperBoundDays The maximum number of days to shift a date forward + * @param contextFieldId (Optional) The column to determine date shift, default : a random shift + * amount + * @param wrappedKey (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates + * @param keyName (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 + * key + * @param projectId ID of Google Cloud project to run the API under. + */ + private static void deidentifyWithDateShift( + Path inputCsvPath, + Path outputCsvPath, + String[] dateFields, + int lowerBoundDays, + int upperBoundDays, + String contextFieldId, + String wrappedKey, + String keyName, + String projectId) + throws Exception { + // instantiate a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + // Set the maximum days to shift a day backward (lowerbound), forward (upperbound) + DateShiftConfig.Builder dateShiftConfigBuilder = + DateShiftConfig.newBuilder() + .setLowerBoundDays(lowerBoundDays) + .setUpperBoundDays(upperBoundDays); + + // If contextFieldId, keyName or wrappedKey is set: all three arguments must be valid + if (contextFieldId != null && keyName != null && wrappedKey != null) { + dateShiftConfigBuilder.setContext(FieldId.newBuilder().setName(contextFieldId).build()); + KmsWrappedCryptoKey kmsWrappedCryptoKey = + KmsWrappedCryptoKey.newBuilder() + .setCryptoKeyName(keyName) + .setWrappedKey(ByteString.copyFrom(BaseEncoding.base64().decode(wrappedKey))) + .build(); + dateShiftConfigBuilder.setCryptoKey( + CryptoKey.newBuilder().setKmsWrapped(kmsWrappedCryptoKey).build()); + + } else if (contextFieldId != null || keyName != null || wrappedKey != null) { + throw new IllegalArgumentException( + "You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!"); + } + + // Read and parse the CSV file + BufferedReader br = null; + String line; + List rows = new ArrayList<>(); + List headers; + + br = new BufferedReader(new FileReader(inputCsvPath.toFile())); + + // convert csv header to FieldId + headers = + Arrays.stream(br.readLine().split(",")) + .map(header -> FieldId.newBuilder().setName(header).build()) + .collect(Collectors.toList()); + + while ((line = br.readLine()) != null) { + // convert csv rows to Table.Row + rows.add(convertCsvRowToTableRow(line)); + } + br.close(); + + Table table = Table.newBuilder().addAllHeaders(headers).addAllRows(rows).build(); + + List dateFieldIds = + Arrays.stream(dateFields) + .map(field -> FieldId.newBuilder().setName(field).build()) + .collect(Collectors.toList()); + + DateShiftConfig dateShiftConfig = dateShiftConfigBuilder.build(); + + FieldTransformation fieldTransformation = + FieldTransformation.newBuilder() + .addAllFields(dateFieldIds) + .setPrimitiveTransformation( + PrimitiveTransformation.newBuilder().setDateShiftConfig(dateShiftConfig).build()) + .build(); + + DeidentifyConfig deidentifyConfig = + DeidentifyConfig.newBuilder() + .setRecordTransformations( + RecordTransformations.newBuilder() + .addFieldTransformations(fieldTransformation) + .build()) + .build(); + + ContentItem tableItem = ContentItem.newBuilder().setTable(table).build(); + + DeidentifyContentRequest request = + DeidentifyContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setDeidentifyConfig(deidentifyConfig) + .setItem(tableItem) + .build(); + + // Execute the deidentification request + DeidentifyContentResponse response = dlpServiceClient.deidentifyContent(request); + + // Write out the response as a CSV file + List outputHeaderFields = response.getItem().getTable().getHeadersList(); + List outputRows = response.getItem().getTable().getRowsList(); + + List outputHeaders = + outputHeaderFields.stream().map(FieldId::getName).collect(Collectors.toList()); + + File outputFile = outputCsvPath.toFile(); + if (!outputFile.exists()) { + outputFile.createNewFile(); + } + BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(outputFile)); + + // write out headers + bufferedWriter.append(String.join(",", outputHeaders) + "\n"); + + // write out each row + for (Table.Row outputRow : outputRows) { + String row = + outputRow + .getValuesList() + .stream() + .map(value -> value.getStringValue()) + .collect(Collectors.joining(",")); + bufferedWriter.append(row + "\n"); + } + + bufferedWriter.flush(); + bufferedWriter.close(); + + System.out.println("Successfully saved date-shift output to: " + outputCsvPath.getFileName()); + } catch (Exception e) { + System.out.println("Error in deidentifyWithDateShift: " + e.getMessage()); + } + } + + // Parse string to valid date, return null when invalid + private static LocalDate getValidDate(String dateString) { + try { + return LocalDate.parse(dateString); + } catch (DateTimeParseException e) { + return null; + } + } + + // convert CSV row into Table.Row + private static Table.Row convertCsvRowToTableRow(String row) { + String[] values = row.split(","); + Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); + for (String value : values) { + LocalDate date = getValidDate(value); + if (date != null) { + // convert to com.google.type.Date + Date dateValue = + Date.newBuilder() + .setYear(date.getYear()) + .setMonth(date.getMonthValue()) + .setDay(date.getDayOfMonth()) + .build(); + Value tableValue = Value.newBuilder().setDateValue(dateValue).build(); + tableRowBuilder.addValues(tableValue); + } else { + tableRowBuilder.addValues(Value.newBuilder().setStringValue(value).build()); + } + } + return tableRowBuilder.build(); + } + // [END dlp_deidentify_date_shift] /** - * Command line application to de-identify data using the Data Loss Prevention API. - * Supported data format: strings + * Command line application to de-identify data using the Data Loss Prevention API. Supported data + * format: strings */ public static void main(String[] args) throws Exception { OptionGroup optionsGroup = new OptionGroup(); optionsGroup.setRequired(true); - Option deidentifyMaskingOption = new Option("m", "mask", true, "deid with character masking"); + Option deidentifyMaskingOption = + new Option("m", "mask", true, "Deidentify with character masking."); optionsGroup.addOption(deidentifyMaskingOption); - Option deidentifyFpeOption = new Option("f", "fpe", true, "deid with FFX FPE"); + Option deidentifyFpeOption = + new Option("f", "fpe", true, "Deidentify with format-preserving encryption."); optionsGroup.addOption(deidentifyFpeOption); + Option reidentifyFpeOption = + new Option("r", "reid", true, "Reidentify with format-preserving encryption."); + optionsGroup.addOption(reidentifyFpeOption); + + Option deidentifyDateShiftOption = + new Option("d", "date", false, "Deidentify dates in a CSV file."); + optionsGroup.addOption(deidentifyDateShiftOption); + Options commandLineOptions = new Options(); commandLineOptions.addOptionGroup(optionsGroup); @@ -224,22 +517,47 @@ public static void main(String[] args) throws Exception { Option.builder("maskingCharacter").hasArg(true).required(false).build(); commandLineOptions.addOption(maskingCharacterOption); - Option numberToMaskOption = - Option.builder("numberToMask").hasArg(true).required(false).build(); + Option surrogateTypeOption = + Option.builder("surrogateType").hasArg(true).required(false).build(); + commandLineOptions.addOption(surrogateTypeOption); + + Option numberToMaskOption = Option.builder("numberToMask").hasArg(true).required(false).build(); commandLineOptions.addOption(numberToMaskOption); - Option alphabetOption = - Option.builder("commonAlphabet").hasArg(true).required(false).build(); + Option alphabetOption = Option.builder("commonAlphabet").hasArg(true).required(false).build(); commandLineOptions.addOption(alphabetOption); - Option wrappedKeyOption = - Option.builder("wrappedKey").hasArg(true).required(false).build(); + Option wrappedKeyOption = Option.builder("wrappedKey").hasArg(true).required(false).build(); commandLineOptions.addOption(wrappedKeyOption); - Option keyNameOption = - Option.builder("keyName").hasArg(true).required(false).build(); + Option keyNameOption = Option.builder("keyName").hasArg(true).required(false).build(); commandLineOptions.addOption(keyNameOption); + Option inputCsvPathOption = Option.builder("inputCsvPath").hasArg(true).required(false).build(); + commandLineOptions.addOption(inputCsvPathOption); + + Option outputCsvPathOption = + Option.builder("outputCsvPath").hasArg(true).required(false).build(); + commandLineOptions.addOption(outputCsvPathOption); + + Option dateFieldsOption = Option.builder("dateFields").hasArg(true).required(false).build(); + commandLineOptions.addOption(dateFieldsOption); + + Option lowerBoundDaysOption = + Option.builder("lowerBoundDays").hasArg(true).required(false).build(); + commandLineOptions.addOption(lowerBoundDaysOption); + + Option upperBoundDaysOption = + Option.builder("upperBoundDays").hasArg(true).required(false).build(); + commandLineOptions.addOption(upperBoundDaysOption); + + Option contextFieldNameOption = + Option.builder("contextField").hasArg(true).required(false).build(); + commandLineOptions.addOption(contextFieldNameOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -253,22 +571,62 @@ public static void main(String[] args) throws Exception { return; } + // default to auto-detected project id when not explicitly provided + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + if (cmd.hasOption("m")) { // deidentification with character masking int numberToMask = Integer.parseInt(cmd.getOptionValue(numberToMaskOption.getOpt(), "0")); char maskingCharacter = cmd.getOptionValue(maskingCharacterOption.getOpt(), "*").charAt(0); String val = cmd.getOptionValue(deidentifyMaskingOption.getOpt()); - deIdentifyWithMask(val, maskingCharacter, numberToMask); + deIdentifyWithMask(val, maskingCharacter, numberToMask, projectId); } else if (cmd.hasOption("f")) { // deidentification with FPE String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); String keyName = cmd.getOptionValue(keyNameOption.getOpt()); String val = cmd.getOptionValue(deidentifyFpeOption.getOpt()); + String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt()); + FfxCommonNativeAlphabet alphabet = + FfxCommonNativeAlphabet.valueOf( + cmd.getOptionValue( + alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); + deIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType); + } else if (cmd.hasOption("d")) { + //deidentify with date shift + String inputCsv = cmd.getOptionValue(inputCsvPathOption.getOpt()); + String outputCsv = cmd.getOptionValue(outputCsvPathOption.getOpt()); + + String contextField = cmd.getOptionValue(contextFieldNameOption.getOpt(), null); + String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt(), null); + String keyName = cmd.getOptionValue(keyNameOption.getOpt(), null); + + String[] dateFields = cmd.getOptionValue(dateFieldsOption.getOpt(), "").split(","); + + int lowerBoundsDay = Integer.valueOf(cmd.getOptionValue(lowerBoundDaysOption.getOpt())); + int upperBoundsDay = Integer.valueOf(cmd.getOptionValue(upperBoundDaysOption.getOpt())); + + deidentifyWithDateShift( + Paths.get(inputCsv), + Paths.get(outputCsv), + dateFields, + lowerBoundsDay, + upperBoundsDay, + contextField, + wrappedKey, + keyName, + projectId); + } else if (cmd.hasOption("r")) { + // reidentification with FPE + String wrappedKey = cmd.getOptionValue(wrappedKeyOption.getOpt()); + String keyName = cmd.getOptionValue(keyNameOption.getOpt()); + String val = cmd.getOptionValue(reidentifyFpeOption.getOpt()); + String surrogateType = cmd.getOptionValue(surrogateTypeOption.getOpt()); FfxCommonNativeAlphabet alphabet = FfxCommonNativeAlphabet.valueOf( cmd.getOptionValue( alphabetOption.getOpt(), FfxCommonNativeAlphabet.ALPHA_NUMERIC.name())); - deIdentifyWithFpe(val, alphabet, keyName, wrappedKey); + reIdentifyWithFpe(val, alphabet, keyName, wrappedKey, projectId, surrogateType); } } } diff --git a/dlp/src/main/java/com/example/dlp/Inspect.java b/dlp/src/main/java/com/example/dlp/Inspect.java index 1d2edf45027..d4adc6d33ba 100644 --- a/dlp/src/main/java/com/example/dlp/Inspect.java +++ b/dlp/src/main/java/com/example/dlp/Inspect.java @@ -16,37 +16,45 @@ package com.example.dlp; -import com.google.api.gax.longrunning.OperationFuture; +import com.google.api.core.SettableApiFuture; import com.google.cloud.ServiceOptions; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.BigQueryOptions; -import com.google.privacy.dlp.v2beta1.BigQueryTable; -import com.google.privacy.dlp.v2beta1.CloudStorageOptions; -import com.google.privacy.dlp.v2beta1.CloudStorageOptions.FileSet; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.DatastoreOptions; -import com.google.privacy.dlp.v2beta1.Finding; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.InspectContentRequest; -import com.google.privacy.dlp.v2beta1.InspectContentResponse; -import com.google.privacy.dlp.v2beta1.InspectOperationMetadata; -import com.google.privacy.dlp.v2beta1.InspectOperationResult; -import com.google.privacy.dlp.v2beta1.InspectResult; -import com.google.privacy.dlp.v2beta1.KindExpression; -import com.google.privacy.dlp.v2beta1.Likelihood; -import com.google.privacy.dlp.v2beta1.OutputStorageConfig; -import com.google.privacy.dlp.v2beta1.PartitionId; -import com.google.privacy.dlp.v2beta1.ResultName; -import com.google.privacy.dlp.v2beta1.StorageConfig; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.pubsub.v1.Subscriber; +import com.google.privacy.dlp.v2.Action; +import com.google.privacy.dlp.v2.BigQueryOptions; +import com.google.privacy.dlp.v2.BigQueryTable; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.CloudStorageOptions; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.CreateDlpJobRequest; +import com.google.privacy.dlp.v2.DatastoreOptions; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.Finding; +import com.google.privacy.dlp.v2.GetDlpJobRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InfoTypeStats; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectConfig.FindingLimits; +import com.google.privacy.dlp.v2.InspectContentRequest; +import com.google.privacy.dlp.v2.InspectContentResponse; +import com.google.privacy.dlp.v2.InspectDataSourceDetails; +import com.google.privacy.dlp.v2.InspectJobConfig; +import com.google.privacy.dlp.v2.InspectResult; +import com.google.privacy.dlp.v2.KindExpression; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.PartitionId; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.privacy.dlp.v2.StorageConfig; import com.google.protobuf.ByteString; +import com.google.pubsub.v1.ProjectSubscriptionName; +import com.google.pubsub.v1.ProjectTopicName; import java.net.URLConnection; import java.nio.file.Files; -import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.concurrent.TimeUnit; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -59,232 +67,308 @@ public class Inspect { + /** + * [START dlp_inspect_string] Inspect a text for given InfoTypes + * + * @param string String to instpect + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param infoTypes The infoTypes of information to match + * @param includeQuote Whether to include the matching string + * @param projectId Google Cloud project ID + */ private static void inspectString( String string, Likelihood minLikelihood, int maxFindings, List infoTypes, - boolean includeQuote) { - // [START dlp_inspect_string] + boolean includeQuote, + String projectId) { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - - // Whether to include the matching string - // includeQuote = true; + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); - // The string to inspect - // string = 'My name is Gary and my email is gary@example.com'; - ContentItem contentItem = - ContentItem.newBuilder().setType("text/plain").setValue(string).build(); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFromUtf8(string)) + .build(); + + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); InspectContentResponse response = dlpServiceClient.inspectContent(request); - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + if (response.getResult().getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : response.getResult().getFindingsList()) { + if (includeQuote) { + System.out.print("\tQuote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { System.out.println("Error in inspectString: " + e.getMessage()); } - // [END dlp_inspect_string] } + // [END dlp_inspect_string] + // [START dlp_inspect_file] + /** + * Inspect a local file + * + * @param filePath The path to a local file to inspect. Can be a text, JPG, or PNG file. + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param infoTypes The infoTypes of information to match + * @param includeQuote Whether to include the matching string + * @param projectId Google Cloud project ID + */ private static void inspectFile( String filePath, Likelihood minLikelihood, int maxFindings, List infoTypes, - boolean includeQuote) { - // [START dlp_inspect_file] + boolean includeQuote, + String projectId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The path to a local file to inspect. Can be a text, JPG, or PNG file. - // fileName = 'path/to/image.png'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - - // Whether to include the matching string - // includeQuote = true; - Path path = Paths.get(filePath); - // detect file mime type, default to application/octet-stream String mimeType = URLConnection.guessContentTypeFromName(filePath); if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - if (mimeType == null) { - mimeType = "application/octet-stream"; + + ByteContentItem.BytesType bytesType; + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; + default: + bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; + break; } - byte[] data = Files.readAllBytes(path); - ContentItem contentItem = - ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build(); + byte[] data = Files.readAllBytes(Paths.get(filePath)); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(bytesType) + .setData(ByteString.copyFrom(data)) + .build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); + InspectContentResponse response = dlpServiceClient.inspectContent(request); - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + InspectResult result = response.getResult(); + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("\tQuote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { - e.printStackTrace(); System.out.println("Error in inspectFile: " + e.getMessage()); } - // [END dlp_inspect_file] } + // [END dlp_inspect_file] + // [START dlp_inspect_gcs] + /** + * Inspect GCS file for Info types and wait on job completion using Google Cloud Pub/Sub + * notification + * + * @param bucketName The name of the bucket where the file resides. + * @param fileName The path to the file within the bucket to inspect (can include wildcards, eg. + * my-image.*) + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param topicId Google Cloud Pub/Sub topic Id to notify of job status + * @param subscriptionId Google Cloud Subscription to above topic to listen for job status updates + * @param projectId Google Cloud project ID + */ private static void inspectGcsFile( - String bucketName, String fileName, Likelihood minLikelihood, List infoTypes) + String bucketName, + String fileName, + Likelihood minLikelihood, + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId, + String projectId) throws Exception { - // [START dlp_inspect_gcs] // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // The name of the bucket where the file resides. - // bucketName = 'YOUR-BUCKET'; - - // The path to the file within the bucket to inspect. - // Can contain wildcards, e.g. "my-image.*" - // fileName = 'my-image.png'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The maximum number of findings to report (0 = server maximum) - // maxFindings = 0; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; CloudStorageOptions cloudStorageOptions = CloudStorageOptions.newBuilder() - .setFileSet(FileSet.newBuilder().setUrl("gs://" + bucketName + "/" + fileName)) + .setFileSet( + CloudStorageOptions.FileSet.newBuilder() + .setUrl("gs://" + bucketName + "/" + fileName)) .build(); StorageConfig storageConfig = StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) .build(); - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // Semi-synchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setInspectJob(inspectJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + + System.out.println("Job created with ID:" + dlpJob.getName()); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (Exception e) { + System.out.println("Unable to verify job completion."); + } + + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); - if (inspectResult.getFindingsCount() > 0) { + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } - } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectGCSFileAsync: " + e.getMessage()); } - // [END dlp_inspect_gcs] } + // [END dlp_inspect_gcs] + // [START dlp_inspect_datastore] + /** + * Inspect a Datastore kind + * + * @param projectId The project ID containing the target Datastore + * @param namespaceId The ID namespace of the Datastore document to inspect + * @param kind The kind of the Datastore entity to inspect + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings max number of findings + * @param topicId Google Cloud Pub/Sub topic to notify job status updates + * @param subscriptionId Google Cloud Pub/Sub subscription to above topic to receive status + * updates + */ private static void inspectDatastore( String projectId, String namespaceId, String kind, Likelihood minLikelihood, - List infoTypes) { - // [START dlp_inspect_datastore] + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // (Optional) The project ID containing the target Datastore - // projectId = my-project-id - - // (Optional) The ID namespace of the Datastore document to inspect. - // To ignore Datastore namespaces, set this to an empty string ('') - // namespaceId = ''; - - // The kind of the Datastore entity to inspect. - // kind = 'Person'; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - // Reference to the Datastore namespace PartitionId partitionId = PartitionId.newBuilder().setProjectId(projectId).setNamespaceId(namespaceId).build(); @@ -298,123 +382,217 @@ private static void inspectDatastore( StorageConfig storageConfig = StorageConfig.newBuilder().setDatastoreOptions(datastoreOptions).build(); + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) .build(); - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); + String pubSubTopic = String.format("projects/%s/topics/%s", projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(pubSubTopic).build(); - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync(inspectConfig, storageConfig, outputConfig); + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // Asynchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setInspectJob(inspectJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + + System.out.println("Job created with ID:" + dlpJob.getName()); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (Exception e) { + System.out.println("Unable to verify job completion."); + } - if (inspectResult.getFindingsCount() > 0) { + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); + + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectDatastore: " + e.getMessage()); + System.out.println("inspectDatastore Problems: " + e.getMessage()); } - // [END dlp_inspect_datastore] } + // [END dlp_inspect_datastore] + // [START dlp_inspect_bigquery] + /** + * Inspect a BigQuery table + * + * @param projectId The project ID to run the API call under + * @param datasetId The ID of the dataset to inspect, e.g. 'my_dataset' + * @param tableId The ID of the table to inspect, e.g. 'my_table' + * @param minLikelihood The minimum likelihood required before returning a match + * @param infoTypes The infoTypes of information to match + * @param maxFindings The maximum number of findings to report (0 = server maximum) + * @param topicId Topic ID for pubsub. + * @param subscriptionId Subscription ID for pubsub. + */ private static void inspectBigquery( - String projectId, - String datasetId, - String tableId, - Likelihood minLikelihood, - List infoTypes) { - // [START dlp_inspect_bigquery] + String projectId, + String datasetId, + String tableId, + Likelihood minLikelihood, + List infoTypes, + int maxFindings, + String topicId, + String subscriptionId) { // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // (Optional) The project ID to run the API call under - // projectId = my-project-id - - // The ID of the dataset to inspect, e.g. 'my_dataset' - // datasetId = "my_dataset"; - - // The ID of the table to inspect, e.g. 'my_table' - // tableId = "my_table"; - - // The minimum likelihood required before returning a match - // minLikelihood = LIKELIHOOD_UNSPECIFIED; - - // The infoTypes of information to match - // infoTypes = ['US_MALE_NAME', 'US_FEMALE_NAME']; - // Reference to the BigQuery table BigQueryTable tableReference = - BigQueryTable.newBuilder() - .setProjectId(projectId) - .setDatasetId(datasetId) - .setTableId(tableId) - .build(); + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); BigQueryOptions bigQueryOptions = - BigQueryOptions.newBuilder() - .setTableReference(tableReference) - .build(); + BigQueryOptions.newBuilder().setTableReference(tableReference).build(); // Construct BigQuery configuration to be inspected StorageConfig storageConfig = - StorageConfig.newBuilder() - .setBigQueryOptions(bigQueryOptions) - .build(); + StorageConfig.newBuilder().setBigQueryOptions(bigQueryOptions).build(); + + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); InspectConfig inspectConfig = - InspectConfig.newBuilder() - .addAllInfoTypes(infoTypes) - .setMinLikelihood(minLikelihood) - .build(); - - // optionally provide an output configuration to store results, default : none - OutputStorageConfig outputConfig = OutputStorageConfig.getDefaultInstance(); - - // asynchronously submit an inspect operation - OperationFuture responseFuture = - dlpServiceClient.createInspectOperationAsync( - inspectConfig, storageConfig, outputConfig); - - // ... - // block on response, returning job id of the operation - InspectOperationResult inspectOperationResult = responseFuture.get(); - String resultName = inspectOperationResult.getName(); - InspectResult inspectResult = dlpServiceClient.listInspectFindings(resultName).getResult(); - - if (inspectResult.getFindingsCount() > 0) { + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + ProjectTopicName topic = ProjectTopicName.of(projectId, topicId); + Action.PublishToPubSub publishToPubSub = + Action.PublishToPubSub.newBuilder().setTopic(topic.toString()).build(); + + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setStorageConfig(storageConfig) + .setInspectConfig(inspectConfig) + .addActions(action) + .build(); + + // Asynchronously submit an inspect job, and wait on results + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setInspectJob(inspectJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + + System.out.println("Job created with ID:" + dlpJob.getName()); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.of(projectId, subscriptionId), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage + .getAttributesMap() + .get("DlpJobName") + .equals(dlpJob.getName())) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (Exception e) { + System.out.println("Unable to verify job completion."); + } + + DlpJob completedJob = + dlpServiceClient.getDlpJob( + GetDlpJobRequest.newBuilder().setName(dlpJob.getName()).build()); + + System.out.println("Job status: " + completedJob.getState()); + InspectDataSourceDetails inspectDataSourceDetails = completedJob.getInspectDetails(); + InspectDataSourceDetails.Result result = inspectDataSourceDetails.getResult(); + if (result.getInfoTypeStatsCount() > 0) { System.out.println("Findings: "); - for (Finding finding : inspectResult.getFindingsList()) { - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + for (InfoTypeStats infoTypeStat : result.getInfoTypeStatsList()) { + System.out.print("\tInfo type: " + infoTypeStat.getInfoType().getName()); + System.out.println("\tCount: " + infoTypeStat.getCount()); } } else { System.out.println("No findings."); } } catch (Exception e) { - e.printStackTrace(); - System.out.println("Error in inspectBigguery: " + e.getMessage()); + System.out.println("inspectBigquery Problems: " + e.getMessage()); } - // [END dlp_inspect_bigquery] } + // [END dlp_inspect_bigquery] /** - * Command line application to inspect data using the Data Loss Prevention API. - * Supported data formats: string, file, text file on GCS, BigQuery table, and Datastore entity + * Command line application to inspect data using the Data Loss Prevention API. Supported data + * formats: string, file, text file on GCS, BigQuery table, and Datastore entity */ public static void main(String[] args) throws Exception { @@ -466,10 +644,16 @@ public static void main(String[] args) throws Exception { Option tableIdOption = Option.builder("tableId").hasArg(true).required(false).build(); commandLineOptions.addOption(tableIdOption); - Option projectIdOption = - Option.builder("projectId").hasArg(true).required(false).build(); + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); commandLineOptions.addOption(projectIdOption); + Option topicIdOption = Option.builder("topicId").hasArg(true).required(false).build(); + commandLineOptions.addOption(topicIdOption); + + Option subscriptionIdOption = + Option.builder("subscriptionId").hasArg(true).required(false).build(); + commandLineOptions.addOption(subscriptionIdOption); + Option datastoreNamespaceOption = Option.builder("namespace").hasArg(true).required(false).build(); commandLineOptions.addOption(datastoreNamespaceOption); @@ -498,6 +682,11 @@ public static void main(String[] args) throws Exception { boolean includeQuote = Boolean.parseBoolean(cmd.getOptionValue(includeQuoteOption.getOpt(), "true")); + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + String topicId = cmd.getOptionValue(topicIdOption.getOpt()); + String subscriptionId = cmd.getOptionValue(subscriptionIdOption.getOpt()); + List infoTypesList = Collections.emptyList(); if (cmd.hasOption(infoTypesOption.getOpt())) { infoTypesList = new ArrayList<>(); @@ -509,32 +698,50 @@ public static void main(String[] args) throws Exception { // string inspection if (cmd.hasOption("s")) { String val = cmd.getOptionValue(stringOption.getOpt()); - inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote); + inspectString(val, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); } else if (cmd.hasOption("f")) { String filePath = cmd.getOptionValue(fileOption.getOpt()); - inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote); + inspectFile(filePath, minLikelihood, maxFindings, infoTypesList, includeQuote, projectId); // gcs file inspection } else if (cmd.hasOption("gcs")) { String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); String fileName = cmd.getOptionValue(gcsFileNameOption.getOpt()); - inspectGcsFile(bucketName, fileName, minLikelihood, infoTypesList); + inspectGcsFile( + bucketName, + fileName, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId, + projectId); // datastore kind inspection } else if (cmd.hasOption("ds")) { String namespaceId = cmd.getOptionValue(datastoreNamespaceOption.getOpt(), ""); String kind = cmd.getOptionValue(datastoreKindOption.getOpt()); // use default project id when project id is not specified - String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); - inspectDatastore(projectId, namespaceId, kind, minLikelihood, infoTypesList); + inspectDatastore( + projectId, + namespaceId, + kind, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId); } else if (cmd.hasOption("bq")) { String datasetId = cmd.getOptionValue(datasetIdOption.getOpt()); String tableId = cmd.getOptionValue(tableIdOption.getOpt()); // use default project id when project id is not specified - String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); - inspectBigquery(projectId, datasetId, tableId, minLikelihood, infoTypesList); + inspectBigquery( + projectId, + datasetId, + tableId, + minLikelihood, + infoTypesList, + maxFindings, + topicId, + subscriptionId); } } } diff --git a/dlp/src/main/java/com/example/dlp/Jobs.java b/dlp/src/main/java/com/example/dlp/Jobs.java new file mode 100644 index 00000000000..43ed11dd4ab --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Jobs.java @@ -0,0 +1,147 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.DeleteDlpJobRequest; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.DlpJobName; +import com.google.privacy.dlp.v2.DlpJobType; +import com.google.privacy.dlp.v2.ListDlpJobsRequest; +import com.google.privacy.dlp.v2.ProjectName; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Jobs { + + // [START dlp_list_jobs] + /* + * List DLP jobs + * + * @param projectId The project ID to run the API call under + * @param filter The filter expression to use, eg. state=DONE For more information on filter + * syntax see https://cloud.google.com/dlp/docs/reference/rest/v2/projects.dlpJobs/list + * @param jobType The type of job to list (either 'INSPECT_JOB' or 'RISK_ANALYSIS_JOB') + */ + private static void listJobs(String projectId, String filter, DlpJobType jobType) + throws Exception { + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ListDlpJobsRequest listDlpJobsRequest = + ListDlpJobsRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setFilter(filter) + .setType(jobType) + .build(); + DlpServiceClient.ListDlpJobsPagedResponse response = + dlpServiceClient.listDlpJobs(listDlpJobsRequest); + for (DlpJob dlpJob : response.getPage().getValues()) { + System.out.println(dlpJob.getName() + " -- " + dlpJob.getState()); + } + } + } + // [END dlp_list_jobs] + + /** + * Delete a DLP Job + * + * @param projectId Google Cloud ProjectID + * @param jobId DLP Job ID + */ + // [START dlp_delete_job] + private static void deleteJob(String projectId, String jobId) { + + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // construct complete job name + DlpJobName job = DlpJobName.of(projectId, jobId); + + DeleteDlpJobRequest deleteDlpJobRequest = + DeleteDlpJobRequest.newBuilder().setName(job.toString()).build(); + + // submit job deletion request + dlpServiceClient.deleteDlpJob(deleteDlpJobRequest); + + System.out.println("Job deleted successfully."); + } catch (Exception e) { + System.err.println("Error deleting DLP job: " + e.getMessage()); + } + } + // [END dlp_delete_job] + + /** Command line application to list and delete DLP jobs the Data Loss Prevention API. */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + Option listOption = new Option("l", "list", false, "List DLP Jobs"); + optionsGroup.addOption(listOption); + + Option deleteOption = new Option("d", "delete", false, "Delete DLP Jobs"); + optionsGroup.addOption(deleteOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option filterOption = Option.builder("filter").hasArg(true).required(false).build(); + commandLineOptions.addOption(filterOption); + + Option jobTypeOption = Option.builder("jobType").hasArg(true).required(false).build(); + commandLineOptions.addOption(jobTypeOption); + + Option jobIdOption = Option.builder("jobId").hasArg(true).required(false).build(); + commandLineOptions.addOption(jobIdOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Inspect.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + if (cmd.hasOption(listOption.getOpt())) { + String filter = cmd.getOptionValue(filterOption.getOpt(), ""); + DlpJobType jobType = + DlpJobType.valueOf( + cmd.getOptionValue( + jobTypeOption.getOpt(), DlpJobType.DLP_JOB_TYPE_UNSPECIFIED.name())); + listJobs(projectId, filter, jobType); + } + + if (cmd.hasOption(deleteOption.getOpt())) { + String jobId = cmd.getOptionValue(jobIdOption.getOpt()); + deleteJob(projectId, jobId); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Metadata.java b/dlp/src/main/java/com/example/dlp/Metadata.java index 12702284108..3b7a85aec5b 100644 --- a/dlp/src/main/java/com/example/dlp/Metadata.java +++ b/dlp/src/main/java/com/example/dlp/Metadata.java @@ -16,11 +16,10 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.CategoryDescription; -import com.google.privacy.dlp.v2beta1.InfoTypeDescription; -import com.google.privacy.dlp.v2beta1.ListInfoTypesResponse; -import com.google.privacy.dlp.v2beta1.ListRootCategoriesResponse; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.InfoTypeDescription; +import com.google.privacy.dlp.v2.ListInfoTypesRequest; +import com.google.privacy.dlp.v2.ListInfoTypesResponse; import java.util.List; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -32,48 +31,37 @@ public class Metadata { - private static void listInfoTypes(String category, String languageCode) throws Exception { - // [START dlp_list_info_types] + // [START dlp_list_info_types] + /* + * List the types of sensitive information the DLP API supports. + * + * @param filter The filter to use, e.g. "supported_by=INSPECT" + * @param languageCode The BCP-47 language code to use, e.g. 'en-US' + */ + private static void listInfoTypes(String filter, String languageCode) throws Exception { + // Instantiate a DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The category of info types to list, e.g. category = 'GOVERNMENT'; - // Optional BCP-47 language code for localized info type friendly names, e.g. 'en-US' - ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(category, languageCode); + ListInfoTypesRequest listInfoTypesRequest = + ListInfoTypesRequest.newBuilder().setFilter(filter).setLanguageCode(languageCode).build(); + ListInfoTypesResponse infoTypesResponse = dlpClient.listInfoTypes(listInfoTypesRequest); List infoTypeDescriptions = infoTypesResponse.getInfoTypesList(); for (InfoTypeDescription infoTypeDescription : infoTypeDescriptions) { System.out.println("Name : " + infoTypeDescription.getName()); System.out.println("Display name : " + infoTypeDescription.getDisplayName()); } } - // [END dlp_list_info_types] - } - - private static void listRootCategories(String languageCode) throws Exception { - // [START dlp_list_categories] - // Instantiate a DLP client - try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The BCP-47 language code to use, e.g. 'en-US' - // languageCode = 'en-US' - ListRootCategoriesResponse rootCategoriesResponse = - dlpClient.listRootCategories(languageCode); - for (CategoryDescription categoryDescription : rootCategoriesResponse.getCategoriesList()) { - System.out.println("Name : " + categoryDescription.getName()); - System.out.println("Display name : " + categoryDescription.getDisplayName()); - } - } - // [END dlp_list_categories] } + // [END dlp_list_info_types] /** Retrieve infoTypes. */ public static void main(String[] args) throws Exception { Options options = new Options(); - Option languageCodeOption = new Option("language", null, true, "BCP-47 language code"); - languageCodeOption.setRequired(false); + Option languageCodeOption = Option.builder("language").hasArg(true).required(false).build(); options.addOption(languageCodeOption); - Option categoryOption = new Option("category", null, true, "Category of info types to list."); - categoryOption.setRequired(false); - options.addOption(categoryOption); + Option filterOption = Option.builder("filter").hasArg(true).required(false).build(); + options.addOption(filterOption); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); @@ -87,11 +75,8 @@ public static void main(String[] args) throws Exception { return; } String languageCode = cmd.getOptionValue(languageCodeOption.getOpt(), "en-US"); - if (cmd.hasOption(categoryOption.getOpt())) { - String category = cmd.getOptionValue(categoryOption.getOpt()); - listInfoTypes(category, languageCode); - } else { - listRootCategories(languageCode); - } + String filter = cmd.getOptionValue(filterOption.getOpt(), ""); + + listInfoTypes(filter, languageCode); } } diff --git a/dlp/src/main/java/com/example/dlp/QuickStart.java b/dlp/src/main/java/com/example/dlp/QuickStart.java index 941a5fb71e8..52946585907 100644 --- a/dlp/src/main/java/com/example/dlp/QuickStart.java +++ b/dlp/src/main/java/com/example/dlp/QuickStart.java @@ -16,15 +16,19 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.Finding; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.InspectContentRequest; -import com.google.privacy.dlp.v2beta1.InspectContentResponse; -import com.google.privacy.dlp.v2beta1.InspectResult; -import com.google.privacy.dlp.v2beta1.Likelihood; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.ContentItem; +import com.google.privacy.dlp.v2.Finding; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectContentRequest; +import com.google.privacy.dlp.v2.InspectContentResponse; +import com.google.privacy.dlp.v2.InspectResult; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.List; @@ -35,11 +39,11 @@ public class QuickStart { public static void main(String[] args) throws Exception { // string to inspect - String text = "Robert Frost"; + String text = "His name was Robert Frost"; // The minimum likelihood required before returning a match: // LIKELIHOOD_UNSPECIFIED, VERY_UNLIKELY, UNLIKELY, POSSIBLE, LIKELY, VERY_LIKELY, UNRECOGNIZED - Likelihood minLikelihood = Likelihood.VERY_LIKELY; + Likelihood minLikelihood = Likelihood.POSSIBLE; // The maximum number of findings to report (0 = server maximum) int maxFindings = 0; @@ -47,8 +51,8 @@ public static void main(String[] args) throws Exception { // The infoTypes of information to match List infoTypes = Arrays.asList( - InfoType.newBuilder().setName("US_MALE_NAME").build(), - InfoType.newBuilder().setName("US_FEMALE_NAME").build()); + InfoType.newBuilder().setName("PERSON_NAME").build(), + InfoType.newBuilder().setName("US_STATE").build()); // Whether to include the matching string boolean includeQuote = true; @@ -56,40 +60,47 @@ public static void main(String[] args) throws Exception { // instantiate a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerItem(maxFindings).build(); + InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) - .setMaxFindings(maxFindings) + .setLimits(findingLimits) .setIncludeQuote(includeQuote) .build(); - ContentItem contentItem = - ContentItem.newBuilder().setType("text/plain").setValue(text).build(); + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(ByteContentItem.BytesType.TEXT_UTF8) + .setData(ByteString.copyFromUtf8(text)) + .build(); + ContentItem contentItem = ContentItem.newBuilder().setByteItem(byteContentItem).build(); + String projectId = ServiceOptions.getDefaultProjectId(); InspectContentRequest request = InspectContentRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .setInspectConfig(inspectConfig) - .addItems(contentItem) + .setItem(contentItem) .build(); // Inspect the text for info types InspectContentResponse response = dlpServiceClient.inspectContent(request); - // Print the response - for (InspectResult result : response.getResultsList()) { - if (result.getFindingsCount() > 0) { - System.out.println("Findings: "); - for (Finding finding : result.getFindingsList()) { - if (includeQuote) { - System.out.print("Quote: " + finding.getQuote()); - } - System.out.print("\tInfo type: " + finding.getInfoType().getName()); - System.out.println("\tLikelihood: " + finding.getLikelihood()); + InspectResult result = response.getResult(); + if (result.getFindingsCount() > 0) { + System.out.println("Findings: "); + for (Finding finding : result.getFindingsList()) { + if (includeQuote) { + System.out.print("\tQuote: " + finding.getQuote()); } - } else { - System.out.println("No findings."); + System.out.print("\tInfo type: " + finding.getInfoType().getName()); + System.out.println("\tLikelihood: " + finding.getLikelihood()); } + } else { + System.out.println("No findings."); } } catch (Exception e) { System.out.println("Error in inspectString: " + e.getMessage()); diff --git a/dlp/src/main/java/com/example/dlp/Redact.java b/dlp/src/main/java/com/example/dlp/Redact.java index 0e738ca36b5..1ffd49fc90c 100644 --- a/dlp/src/main/java/com/example/dlp/Redact.java +++ b/dlp/src/main/java/com/example/dlp/Redact.java @@ -16,176 +16,145 @@ package com.example.dlp; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.privacy.dlp.v2beta1.ContentItem; -import com.google.privacy.dlp.v2beta1.InfoType; -import com.google.privacy.dlp.v2beta1.InspectConfig; -import com.google.privacy.dlp.v2beta1.Likelihood; -import com.google.privacy.dlp.v2beta1.RedactContentRequest; -import com.google.privacy.dlp.v2beta1.RedactContentRequest.ImageRedactionConfig; -import com.google.privacy.dlp.v2beta1.RedactContentRequest.ReplaceConfig; -import com.google.privacy.dlp.v2beta1.RedactContentResponse; +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.ByteContentItem; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.privacy.dlp.v2.RedactImageRequest; +import com.google.privacy.dlp.v2.RedactImageResponse; import com.google.protobuf.ByteString; import java.io.FileOutputStream; import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import javax.activation.MimetypesFileTypeMap; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; -import org.apache.commons.cli.OptionGroup; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; public class Redact { - private static void redactString( - String string, String replacement, Likelihood minLikelihood, List infoTypes) - throws Exception { - // [START dlp_redact_string] - // Instantiate the DLP client - try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The minimum likelihood required before returning a match - // eg.minLikelihood = LIKELIHOOD_VERY_LIKELY; - InspectConfig inspectConfig = - InspectConfig.newBuilder() - .addAllInfoTypes(infoTypes) - .setMinLikelihood(minLikelihood) - .build(); - - ContentItem contentItem = - ContentItem.newBuilder() - .setType("text/plain") - .setData(ByteString.copyFrom(string.getBytes())) - .build(); - - List replaceConfigs = new ArrayList<>(); - - if (infoTypes.isEmpty()) { - // replace all detected sensitive elements with replacement string - replaceConfigs.add(ReplaceConfig.newBuilder().setReplaceWith(replacement).build()); - } else { - // Replace select info types with chosen replacement string - for (InfoType infoType : infoTypes) { - replaceConfigs.add( - ReplaceConfig.newBuilder().setInfoType(infoType).setReplaceWith(replacement).build()); - } - } - - RedactContentRequest request = RedactContentRequest.newBuilder() - .setInspectConfig(inspectConfig) - .addAllItems(Collections.singletonList(contentItem)) - .addAllReplaceConfigs(replaceConfigs) - .build(); - - RedactContentResponse contentResponse = dlpClient.redactContent(request); - for (ContentItem responseItem : contentResponse.getItemsList()) { - // print out string with redacted content - System.out.println(responseItem.getData().toStringUtf8()); - } - } - // [END dlp_redact_string] - } - + // [START dlp_redact_image] + /* + * Redact sensitive data from an image using the Data Loss Prevention API. + * + * @param filePath The path to a local file to inspect. Can be a JPG or PNG image file. + * @param minLikelihood The minimum likelihood required before redacting a match. + * @param infoTypes The infoTypes of information to redact. + * @param outputPath The local path to save the resulting image to. + * @param projectId The project ID to run the API call under. + */ private static void redactImage( - String filePath, Likelihood minLikelihood, List infoTypes, String outputPath) + String filePath, + Likelihood minLikelihood, + List infoTypes, + String outputPath, + String projectId) throws Exception { - // [START dlp_redact_image] + // Instantiate the DLP client try (DlpServiceClient dlpClient = DlpServiceClient.create()) { - // The path to a local file to inspect. Can be a JPG or PNG image file. - // filePath = 'path/to/image.png' - // detect file mime type, default to application/octet-stream String mimeType = URLConnection.guessContentTypeFromName(filePath); if (mimeType == null) { mimeType = MimetypesFileTypeMap.getDefaultFileTypeMap().getContentType(filePath); } - if (mimeType == null) { - mimeType = "application/octet-stream"; + + ByteContentItem.BytesType bytesType; + switch (mimeType) { + case "image/jpeg": + bytesType = ByteContentItem.BytesType.IMAGE_JPEG; + break; + case "image/bmp": + bytesType = ByteContentItem.BytesType.IMAGE_BMP; + break; + case "image/png": + bytesType = ByteContentItem.BytesType.IMAGE_PNG; + break; + case "image/svg": + bytesType = ByteContentItem.BytesType.IMAGE_SVG; + break; + default: + bytesType = ByteContentItem.BytesType.BYTES_TYPE_UNSPECIFIED; + break; } byte[] data = Files.readAllBytes(Paths.get(filePath)); - // The minimum likelihood required before redacting a match - // minLikelihood = 'LIKELIHOOD_UNSPECIFIED' - - // The infoTypes of information to redact - // infoTypes = [{ name: 'EMAIL_ADDRESS' }, { name: 'PHONE_NUMBER' }] - - // The local path to save the resulting image to. - // outputPath = 'result.png' - InspectConfig inspectConfig = InspectConfig.newBuilder() .addAllInfoTypes(infoTypes) .setMinLikelihood(minLikelihood) .build(); - ContentItem contentItem = - ContentItem.newBuilder().setType(mimeType).setData(ByteString.copyFrom(data)).build(); - - List imageRedactionConfigs = new ArrayList<>(); - for (InfoType infoType : infoTypes) { - // clear the specific info type if detected in the image - // use .setRedactionColor to color detected info type without clearing - ImageRedactionConfig imageRedactionConfig = - ImageRedactionConfig.newBuilder().setInfoType(infoType).clearTarget().build(); - imageRedactionConfigs.add(imageRedactionConfig); - } - RedactContentRequest redactContentRequest = - RedactContentRequest.newBuilder() - .setInspectConfig(inspectConfig) + + ByteContentItem byteContentItem = + ByteContentItem.newBuilder() + .setType(bytesType) + .setData(ByteString.copyFrom(data)) + .build(); + + List imageRedactionConfigs = + infoTypes + .stream() + .map( + infoType -> + RedactImageRequest.ImageRedactionConfig.newBuilder() + .setInfoType(infoType) + .build()) + .collect(Collectors.toList()); + + RedactImageRequest redactImageRequest = + RedactImageRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) .addAllImageRedactionConfigs(imageRedactionConfigs) - .addItems(contentItem) + .setByteItem(byteContentItem) + .setInspectConfig(inspectConfig) .build(); - RedactContentResponse contentResponse = dlpClient.redactContent(redactContentRequest); - for (ContentItem responseItem : contentResponse.getItemsList()) { - // redacted image data - ByteString redactedImageData = responseItem.getData(); - FileOutputStream outputStream = new FileOutputStream(outputPath); - outputStream.write(redactedImageData.toByteArray()); - outputStream.close(); - } - // [END dlp_redact_image] + RedactImageResponse redactImageResponse = dlpClient.redactImage(redactImageRequest); + + // redacted image data + ByteString redactedImageData = redactImageResponse.getRedactedImage(); + FileOutputStream outputStream = new FileOutputStream(outputPath); + outputStream.write(redactedImageData.toByteArray()); + outputStream.close(); } } + // [END dlp_redact_image] /** Command line application to redact strings, images using the Data Loss Prevention API. */ public static void main(String[] args) throws Exception { - OptionGroup optionsGroup = new OptionGroup(); - optionsGroup.setRequired(true); - Option stringOption = new Option("s", "string", true, "redact string"); - optionsGroup.addOption(stringOption); - - Option fileOption = new Option("f", "file path", true, "redact input file path"); - optionsGroup.addOption(fileOption); Options commandLineOptions = new Options(); - commandLineOptions.addOptionGroup(optionsGroup); Option minLikelihoodOption = Option.builder("minLikelihood").hasArg(true).required(false).build(); commandLineOptions.addOption(minLikelihoodOption); - Option replaceOption = - Option.builder("r").longOpt("replace string").hasArg(true).required(false).build(); - commandLineOptions.addOption(replaceOption); - Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); infoTypesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(infoTypesOption); + Option inputFilePathOption = + Option.builder("f").hasArg(true).longOpt("inputFilePath").required(false).build(); + commandLineOptions.addOption(inputFilePathOption); + Option outputFilePathOption = Option.builder("o").hasArg(true).longOpt("outputFilePath").required(false).build(); + commandLineOptions.addOption(outputFilePathOption); + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -199,8 +168,6 @@ public static void main(String[] args) throws Exception { return; } - String replacement = cmd.getOptionValue(replaceOption.getOpt(), "_REDACTED_"); - List infoTypesList = new ArrayList<>(); String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); if (infoTypes != null) { @@ -213,14 +180,10 @@ public static void main(String[] args) throws Exception { cmd.getOptionValue( minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); - // string inspection - if (cmd.hasOption("s")) { - String source = cmd.getOptionValue(stringOption.getOpt()); - redactString(source, replacement, minLikelihood, infoTypesList); - } else if (cmd.hasOption("f")) { - String filePath = cmd.getOptionValue(fileOption.getOpt()); - String outputFilePath = cmd.getOptionValue(outputFilePathOption.getOpt()); - redactImage(filePath, minLikelihood, infoTypesList, outputFilePath); - } + String inputFilePath = cmd.getOptionValue(inputFilePathOption.getOpt()); + String outputFilePath = cmd.getOptionValue(outputFilePathOption.getOpt()); + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + redactImage(inputFilePath, minLikelihood, infoTypesList, outputFilePath, projectId); } } diff --git a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java index 853b902c555..161129d311b 100644 --- a/dlp/src/main/java/com/example/dlp/RiskAnalysis.java +++ b/dlp/src/main/java/com/example/dlp/RiskAnalysis.java @@ -16,30 +16,49 @@ package com.example.dlp; -import com.google.api.gax.longrunning.OperationFuture; +import com.google.api.core.SettableApiFuture; import com.google.cloud.ServiceOptions; -import com.google.cloud.dlp.v2beta1.DlpServiceClient; -import com.google.longrunning.Operation; -import com.google.privacy.dlp.v2beta1.AnalyzeDataSourceRiskRequest; -import com.google.privacy.dlp.v2beta1.BigQueryTable; -import com.google.privacy.dlp.v2beta1.FieldId; -import com.google.privacy.dlp.v2beta1.PrivacyMetric; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.CategoricalStatsConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.KAnonymityConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.LDiversityConfig; -import com.google.privacy.dlp.v2beta1.PrivacyMetric.NumericalStatsConfig; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationMetadata; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.CategoricalStatsResult.CategoricalStatsHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityEquivalenceClass; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.KAnonymityResult.KAnonymityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityEquivalenceClass; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.LDiversityResult.LDiversityHistogramBucket; -import com.google.privacy.dlp.v2beta1.RiskAnalysisOperationResult.NumericalStatsResult; -import com.google.privacy.dlp.v2beta1.Value; -import com.google.privacy.dlp.v2beta1.ValueFrequency; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.pubsub.v1.Subscriber; +import com.google.privacy.dlp.v2.Action; +import com.google.privacy.dlp.v2.Action.PublishToPubSub; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.CategoricalStatsResult.CategoricalStatsHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityEquivalenceClass; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KAnonymityResult.KAnonymityHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationHistogramBucket; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.KMapEstimationResult.KMapEstimationQuasiIdValues; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityEquivalenceClass; +import com.google.privacy.dlp.v2.AnalyzeDataSourceRiskDetails.LDiversityResult.LDiversityHistogramBucket; +import com.google.privacy.dlp.v2.BigQueryTable; +import com.google.privacy.dlp.v2.CreateDlpJobRequest; +import com.google.privacy.dlp.v2.DlpJob; +import com.google.privacy.dlp.v2.FieldId; +import com.google.privacy.dlp.v2.GetDlpJobRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.PrivacyMetric; +import com.google.privacy.dlp.v2.PrivacyMetric.CategoricalStatsConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KAnonymityConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.KMapEstimationConfig.TaggedField; +import com.google.privacy.dlp.v2.PrivacyMetric.LDiversityConfig; +import com.google.privacy.dlp.v2.PrivacyMetric.NumericalStatsConfig; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.privacy.dlp.v2.RiskAnalysisJobConfig; +import com.google.privacy.dlp.v2.Value; +import com.google.privacy.dlp.v2.ValueFrequency; +import com.google.pubsub.v1.ProjectSubscriptionName; +import com.google.pubsub.v1.ProjectTopicName; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.stream.Collectors; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -52,114 +71,151 @@ public class RiskAnalysis { - private static void calculateNumericalStats( - String projectId, String datasetId, String tableId, String columnName) + // [START dlp_numerical_stats] + /** + * Calculate numerical statistics for a column in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze, which must contain only numerical data. + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ + private static void numericalStatsAnalysis( + String projectId, + String datasetId, + String tableId, + String columnName, + String topicId, + String subscriptionId) throws Exception { - // [START dlp_numerical_stats] - /** - * Calculate numerical statistics for a column in a BigQuery table using the DLP API. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze, which must contain only numerical data. - */ - - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // columnName = "firstName"; - - FieldId fieldId = - FieldId.newBuilder() - .setColumnName(columnName) - .build(); - - NumericalStatsConfig numericalStatsConfig = - NumericalStatsConfig.newBuilder() - .setField(fieldId) - .build(); - BigQueryTable bigQueryTable = BigQueryTable.newBuilder() - .setProjectId(projectId) - .setDatasetId(datasetId) .setTableId(tableId) + .setDatasetId(datasetId) + .setProjectId(projectId) .build(); + FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); + + NumericalStatsConfig numericalStatsConfig = + NumericalStatsConfig.newBuilder().setField(fieldId).build(); + PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setNumericalStatsConfig(numericalStatsConfig) - .build(); + PrivacyMetric.newBuilder().setNumericalStatsConfig(numericalStatsConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) - .setSourceTable(bigQueryTable) - .build(); + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - NumericalStatsResult results = - response.getNumericalStatsResult(); + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() + .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); - System.out.println( - "Value range: [" + results.getMaxValue() + ", " + results.getMinValue() + "]"); + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) + .build(); - // Print out unique quantiles - String previousValue = ""; - for (int i = 0; i < results.getQuantileValuesCount(); i++) { - Value valueObj = results.getQuantileValues(i); - String value = valueObj.toString(); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } - if (!previousValue.equals(value)) { - System.out.println("Value at " + i + "% quantile: " + value.toString()); - previousValue = value; + // Retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + AnalyzeDataSourceRiskDetails.NumericalStatsResult result = + riskDetails.getNumericalStatsResult(); + + System.out.printf( + "Value range : [%.3f, %.3f]\n", + result.getMinValue().getFloatValue(), result.getMaxValue().getFloatValue()); + + int percent = 1; + Double lastValue = null; + for (Value quantileValue : result.getQuantileValuesList()) { + Double currentValue = quantileValue.getFloatValue(); + if (lastValue == null || !lastValue.equals(currentValue)) { + System.out.printf("Value at %s %% quantile : %.3f", percent, currentValue); } + lastValue = currentValue; } } catch (Exception e) { - System.out.println("Error in numericalStatsAnalysis: " + e.getMessage()); + System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); } - // [END dlp_numerical_stats] } + // [END dlp_numerical_stats] - private static void calculateCategoricalStats( - String projectId, String datasetId, String tableId, String columnName) - throws Exception { - // [START dlp_categorical_stats] - /** - * Calculate categorical statistics for a column in a BigQuery table using the DLP API. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param columnName The name of the column to analyze, which need not contain numerical data. - */ - - // instantiate a client - try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // [START dlp_categorical_stats] + /** + * Calculate categorical statistics for a column in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param columnName The name of the column to analyze, which need not contain numerical data. + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ + private static void categoricalStatsAnalysis( + String projectId, + String datasetId, + String tableId, + String columnName, + String topicId, + String subscriptionId) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // columnName = "firstName"; + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - FieldId fieldId = - FieldId.newBuilder() - .setColumnName(columnName) - .build(); + FieldId fieldId = FieldId.newBuilder().setName(columnName).build(); CategoricalStatsConfig categoricalStatsConfig = - CategoricalStatsConfig.newBuilder() - .setField(fieldId) - .build(); + CategoricalStatsConfig.newBuilder().setField(fieldId).build(); BigQueryTable bigQueryTable = BigQueryTable.newBuilder() @@ -169,76 +225,119 @@ private static void calculateCategoricalStats( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setCategoricalStatsConfig(categoricalStatsConfig) - .build(); + PrivacyMetric.newBuilder().setCategoricalStatsConfig(categoricalStatsConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + ProjectTopicName topicName = ProjectTopicName.of(projectId, topicId); + + PublishToPubSub publishToPubSub = + PublishToPubSub.newBuilder().setTopic(topicName.toString()).build(); + + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - CategoricalStatsHistogramBucket results = - response.getCategoricalStatsResult().getValueFrequencyHistogramBuckets(0); - - System.out.println( - "Most common value occurs " + results.getValueFrequencyUpperBound() + " time(s)"); - System.out.println( - "Least common value occurs " + results.getValueFrequencyLowerBound() + " time(s)"); - - for (ValueFrequency valueFrequency : results.getBucketValuesList()) { - System.out.println("Value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)." - ); + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); } + // Retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + AnalyzeDataSourceRiskDetails.CategoricalStatsResult result = + riskDetails.getCategoricalStatsResult(); + + for (CategoricalStatsHistogramBucket bucket : + result.getValueFrequencyHistogramBucketsList()) { + System.out.printf( + "Most common value occurs %d time(s).\n", bucket.getValueFrequencyUpperBound()); + System.out.printf( + "Least common value occurs %d time(s).\n", bucket.getValueFrequencyLowerBound()); + for (ValueFrequency valueFrequency : bucket.getBucketValuesList()) { + System.out.printf( + "Value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), valueFrequency.getCount()); + } + } } catch (Exception e) { System.out.println("Error in categoricalStatsAnalysis: " + e.getMessage()); } - // [END dlp_categorical_stats] } + // [END dlp_categorical_stats] + // [START dlp_k_anonymity] + /** + * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ private static void calculateKAnonymity( - String projectId, String datasetId, String tableId, List quasiIds) + String projectId, + String datasetId, + String tableId, + List quasiIds, + String topicId, + String subscriptionId) throws Exception { - // [START dlp_k_anonymity] - /** - * Calculate k-anonymity for quasi-identifiers in a BigQuery table using the DLP API. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param quasiIds The names of columns that form a composite key ('quasi-identifiers'). - */ - - // instantiate a client + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = 'my_dataset'; - // tableId = 'my_table'; - // quasiIds = [{ columnName: 'age' }, { columnName: 'city' }]; - List quasiIdFields = quasiIds .stream() - .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .map(columnName -> FieldId.newBuilder().setName(columnName).build()) .collect(Collectors.toList()); KAnonymityConfig kanonymityConfig = - KAnonymityConfig.newBuilder() - .addAllQuasiIds(quasiIdFields) - .build(); + KAnonymityConfig.newBuilder().addAllQuasiIds(quasiIdFields).build(); BigQueryTable bigQueryTable = BigQueryTable.newBuilder() @@ -248,83 +347,124 @@ private static void calculateKAnonymity( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setKAnonymityConfig(kanonymityConfig) - .build(); + PrivacyMetric.newBuilder().setKAnonymityConfig(kanonymityConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); + + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - KAnonymityHistogramBucket results = - response.getKAnonymityResult().getEquivalenceClassHistogramBuckets(0); - - System.out.println("Bucket size range: [" - + results.getEquivalenceClassSizeLowerBound() - + ", " - + results.getEquivalenceClassSizeUpperBound() - + "]" - ); - - for (KAnonymityEquivalenceClass bucket : results.getBucketValuesList()) { - List quasiIdValues = bucket.getQuasiIdsValuesList() - .stream() - .map(v -> v.toString()) - .collect(Collectors.toList()); - - System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); - System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } + + // Retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + KAnonymityResult kanonymityResult = riskDetails.getKAnonymityResult(); + for (KAnonymityHistogramBucket result : + kanonymityResult.getEquivalenceClassHistogramBucketsList()) { + System.out.printf( + "Bucket size range: [%d, %d]\n", + result.getEquivalenceClassSizeLowerBound(), result.getEquivalenceClassSizeUpperBound()); + + for (KAnonymityEquivalenceClass bucket : result.getBucketValuesList()) { + List quasiIdValues = + bucket + .getQuasiIdsValuesList() + .stream() + .map(v -> v.toString()) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + } } } catch (Exception e) { - System.out.println("Error in kAnonymityAnalysis: " + e.getMessage()); + System.out.println("Error in calculateKAnonymity: " + e.getMessage()); } - // [END dlp_k_anonymity] } + // [END dlp_k_anonymity] + // [START dlp_l_diversity] + /** + * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against + * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ private static void calculateLDiversity( String projectId, String datasetId, String tableId, String sensitiveAttribute, - List quasiIds - ) throws Exception { - // [START dlp_l_diversity] - /** - * Calculate l-diversity for an attribute relative to quasi-identifiers in a BigQuery table. - * @param projectId The Google Cloud Platform project ID to run the API call under. - * @param datasetId The BigQuery dataset to analyze. - * @param tableId The BigQuery table to analyze. - * @param sensitiveAttribute The name of the attribute to compare the quasi-ID against - * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). - */ - - // instantiate a client + List quasiIds, + String topicId, + String subscriptionId) + throws Exception { + + // Instantiates a client try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { - // projectId = process.env.GCLOUD_PROJECT; - // datasetId = "my_dataset"; - // tableId = "my_table"; - // sensitiveAttribute = "name"; - // quasiIds = [{ columnName: "age" }, { columnName: "city" }]; - - FieldId sensitiveAttributeField = - FieldId.newBuilder() - .setColumnName(sensitiveAttribute) - .build(); + FieldId sensitiveAttributeField = FieldId.newBuilder().setName(sensitiveAttribute).build(); List quasiIdFields = quasiIds .stream() - .map(columnName -> FieldId.newBuilder().setColumnName(columnName).build()) + .map(columnName -> FieldId.newBuilder().setName(columnName).build()) .collect(Collectors.toList()); LDiversityConfig ldiversityConfig = @@ -341,53 +481,248 @@ private static void calculateLDiversity( .build(); PrivacyMetric privacyMetric = - PrivacyMetric.newBuilder() - .setLDiversityConfig(ldiversityConfig) - .build(); + PrivacyMetric.newBuilder().setLDiversityConfig(ldiversityConfig).build(); - AnalyzeDataSourceRiskRequest request = - AnalyzeDataSourceRiskRequest.newBuilder() - .setPrivacyMetric(privacyMetric) + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); + + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) .build(); - // asynchronously submit a risk analysis operation - OperationFuture - responseFuture = dlpServiceClient.analyzeDataSourceRiskAsync(request); - - // ... - // block on response - RiskAnalysisOperationResult response = responseFuture.get(); - LDiversityHistogramBucket results = - response.getLDiversityResult().getSensitiveValueFrequencyHistogramBuckets(0); - - for (LDiversityEquivalenceClass bucket : results.getBucketValuesList()) { - List quasiIdValues = bucket.getQuasiIdsValuesList() - .stream() - .map(v -> v.toString()) - .collect(Collectors.toList()); - - System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); - System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); - - for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { - System.out.println("\t\tSensitive value " - + valueFrequency.getValue().toString() - + " occurs " - + valueFrequency.getCount() - + " time(s)."); + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } + + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + LDiversityResult ldiversityResult = riskDetails.getLDiversityResult(); + for (LDiversityHistogramBucket result : + ldiversityResult.getSensitiveValueFrequencyHistogramBucketsList()) { + for (LDiversityEquivalenceClass bucket : result.getBucketValuesList()) { + List quasiIdValues = + bucket + .getQuasiIdsValuesList() + .stream() + .map(Value::toString) + .collect(Collectors.toList()); + + System.out.println("\tQuasi-ID values: " + String.join(", ", quasiIdValues)); + System.out.println("\tClass size: " + bucket.getEquivalenceClassSize()); + + for (ValueFrequency valueFrequency : bucket.getTopSensitiveValuesList()) { + System.out.printf( + "\t\tSensitive value %s occurs %d time(s).\n", + valueFrequency.getValue().toString(), valueFrequency.getCount()); + } } } } catch (Exception e) { - System.out.println("Error in lDiversityAnalysis: " + e.getMessage()); + System.out.println("Error in calculateLDiversity: " + e.getMessage()); } - // [END dlp_l_diversity] } + // [END dlp_l_diversity] + + // [START dlp_k_map] + /** + * Calculate k-map risk estimation for an attribute relative to quasi-identifiers in a BigQuery + * table. + * + * @param projectId The Google Cloud Platform project ID to run the API call under. + * @param datasetId The BigQuery dataset to analyze. + * @param tableId The BigQuery table to analyze. + * @param quasiIds A set of column names that form a composite key ('quasi-identifiers'). + * @param infoTypes The infoTypes corresponding to each quasi-id column + * @param regionCode An ISO-3166-1 region code specifying the k-map distribution region + * @param topicId The name of the Pub/Sub topic to notify once the job completes + * @param subscriptionId The name of the Pub/Sub subscription to use when listening for job + * completion status. + */ + private static void calculateKMap( + String projectId, + String datasetId, + String tableId, + List quasiIds, + List infoTypes, + String regionCode, + String topicId, + String subscriptionId) + throws Exception { + + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + Iterator quasiIdsIterator = quasiIds.iterator(); + Iterator infoTypesIterator = infoTypes.iterator(); + + if (quasiIds.size() != infoTypes.size()) { + throw new IllegalArgumentException("The numbers of quasi-IDs and infoTypes must be equal!"); + } + + ArrayList taggedFields = new ArrayList(); + + while (quasiIdsIterator.hasNext() || infoTypesIterator.hasNext()) { + taggedFields.add( + TaggedField.newBuilder() + .setField(FieldId.newBuilder().setName(quasiIdsIterator.next()).build()) + .setInfoType(infoTypesIterator.next()) + .build()); + } + + KMapEstimationConfig kmapConfig = + KMapEstimationConfig.newBuilder() + .addAllQuasiIds(taggedFields) + .setRegionCode(regionCode) + .build(); + + BigQueryTable bigQueryTable = + BigQueryTable.newBuilder() + .setProjectId(projectId) + .setDatasetId(datasetId) + .setTableId(tableId) + .build(); + + PrivacyMetric privacyMetric = + PrivacyMetric.newBuilder().setKMapEstimationConfig(kmapConfig).build(); + + String topicName = String.format("projects/%s/topics/%s", projectId, topicId); + + PublishToPubSub publishToPubSub = PublishToPubSub.newBuilder().setTopic(topicName).build(); + + // Create action to publish job status notifications over Google Cloud Pub/Sub + Action action = Action.newBuilder().setPubSub(publishToPubSub).build(); + + RiskAnalysisJobConfig riskAnalysisJobConfig = + RiskAnalysisJobConfig.newBuilder() + .setSourceTable(bigQueryTable) + .setPrivacyMetric(privacyMetric) + .addActions(action) + .build(); + + CreateDlpJobRequest createDlpJobRequest = + CreateDlpJobRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setRiskJob(riskAnalysisJobConfig) + .build(); + + DlpJob dlpJob = dlpServiceClient.createDlpJob(createDlpJobRequest); + String dlpJobName = dlpJob.getName(); + + final SettableApiFuture done = SettableApiFuture.create(); + + // Set up a Pub/Sub subscriber to listen on the job completion status + Subscriber subscriber = + Subscriber.newBuilder( + ProjectSubscriptionName.newBuilder() + .setProject(projectId) + .setSubscription(subscriptionId) + .build(), + (pubsubMessage, ackReplyConsumer) -> { + if (pubsubMessage.getAttributesCount() > 0 + && pubsubMessage.getAttributesMap().get("DlpJobName").equals(dlpJobName)) { + // notify job completion + done.set(true); + ackReplyConsumer.ack(); + } + }) + .build(); + subscriber.startAsync(); + + // Wait for job completion semi-synchronously + // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions + try { + done.get(1, TimeUnit.MINUTES); + Thread.sleep(500); // Wait for the job to become available + } catch (TimeoutException e) { + System.out.println("Unable to verify job completion."); + } + // retrieve completed job status + DlpJob completedJob = + dlpServiceClient.getDlpJob(GetDlpJobRequest.newBuilder().setName(dlpJobName).build()); + + System.out.println("Job status: " + completedJob.getState()); + AnalyzeDataSourceRiskDetails riskDetails = completedJob.getRiskDetails(); + + KMapEstimationResult kmapResult = riskDetails.getKMapEstimationResult(); + for (KMapEstimationHistogramBucket result : kmapResult.getKMapEstimationHistogramList()) { + + System.out.printf( + "\tAnonymity range: [%d, %d]\n", result.getMinAnonymity(), result.getMaxAnonymity()); + System.out.printf("\tSize: %d\n", result.getBucketSize()); + + for (KMapEstimationQuasiIdValues valueBucket : result.getBucketValuesList()) { + String quasiIdValues = + valueBucket + .getQuasiIdsValuesList() + .stream() + .map( + v -> { + String s = v.toString(); + return s.substring(s.indexOf(':') + 1).trim(); + }) + .collect(Collectors.joining(", ")); + + System.out.printf("\tValues: {%s}\n", quasiIdValues); + System.out.printf( + "\tEstimated k-map anonymity: %d\n", valueBucket.getEstimatedAnonymity()); + } + } + } catch (Exception e) { + System.out.println("Error in calculateKMap: " + e.getMessage()); + } + } + // [END dlp_k_map] /** - * Command line application to perform risk analysis using the Data Loss Prevention API. - * Supported data format: BigQuery tables + * Command line application to perform risk analysis using the Data Loss Prevention API. Supported + * data format: BigQuery tables */ public static void main(String[] args) throws Exception { @@ -400,9 +735,12 @@ public static void main(String[] args) throws Exception { Option categoricalAnalysisOption = new Option("c", "categorical"); optionsGroup.addOption(categoricalAnalysisOption); - Option kanonymityOption = new Option("k", "kAnonymity"); + Option kanonymityOption = new Option("a", "kAnonymity"); optionsGroup.addOption(kanonymityOption); + Option kmapOption = new Option("m", "kAnonymity"); + optionsGroup.addOption(kmapOption); + Option ldiversityOption = new Option("l", "lDiversity"); optionsGroup.addOption(ldiversityOption); @@ -418,18 +756,32 @@ public static void main(String[] args) throws Exception { Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); commandLineOptions.addOption(projectIdOption); - Option columnNameOption = - Option.builder("columnName").hasArg(true).required(false).build(); + Option topicIdOption = Option.builder("topicId").hasArg(true).required(false).build(); + commandLineOptions.addOption(topicIdOption); + + Option subscriptionIdOption = + Option.builder("subscriptionId").hasArg(true).required(false).build(); + commandLineOptions.addOption(subscriptionIdOption); + + Option columnNameOption = Option.builder("columnName").hasArg(true).required(false).build(); commandLineOptions.addOption(columnNameOption); Option sensitiveAttributeOption = Option.builder("sensitiveAttribute").hasArg(true).required(false).build(); commandLineOptions.addOption(sensitiveAttributeOption); + Option regionCodeOption = Option.builder("regionCode").hasArg(true).required(false).build(); + commandLineOptions.addOption(regionCodeOption); + Option quasiIdColumnNamesOption = Option.builder("quasiIdColumnNames").hasArg(true).required(false).build(); + quasiIdColumnNamesOption.setArgs(Option.UNLIMITED_VALUES); commandLineOptions.addOption(quasiIdColumnNamesOption); + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd; @@ -447,28 +799,62 @@ public static void main(String[] args) throws Exception { String tableId = cmd.getOptionValue(tableIdOption.getOpt()); // use default project id when project id is not specified String projectId = - cmd.getOptionValue( - projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + String regionCode = cmd.getOptionValue(regionCodeOption.getOpt(), "US"); + + String topicId = cmd.getOptionValue(topicIdOption.getOpt()); + String subscriptionId = cmd.getOptionValue(subscriptionIdOption.getOpt()); + + List infoTypesList = Collections.emptyList(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } if (cmd.hasOption("n")) { // numerical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateNumericalStats(projectId, datasetId, tableId, columnName); + numericalStatsAnalysis(projectId, datasetId, tableId, columnName, topicId, subscriptionId); } else if (cmd.hasOption("c")) { // categorical stats analysis String columnName = cmd.getOptionValue(columnNameOption.getOpt()); - calculateCategoricalStats(projectId, datasetId, tableId, columnName); - } else if (cmd.hasOption("k")) { + categoricalStatsAnalysis(projectId, datasetId, tableId, columnName, topicId, subscriptionId); + } else if (cmd.hasOption("a")) { // k-anonymity analysis List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - calculateKAnonymity(projectId, datasetId, tableId, quasiIdColumnNames); + calculateKAnonymity( + projectId, datasetId, tableId, quasiIdColumnNames, topicId, subscriptionId); + } else if (cmd.hasOption("m")) { + // k-map analysis + List quasiIdColumnNames = + Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); + calculateKMap( + projectId, + datasetId, + tableId, + quasiIdColumnNames, + infoTypesList, + regionCode, + topicId, + subscriptionId); } else if (cmd.hasOption("l")) { // l-diversity analysis String sensitiveAttribute = cmd.getOptionValue(sensitiveAttributeOption.getOpt()); List quasiIdColumnNames = Arrays.asList(cmd.getOptionValues(quasiIdColumnNamesOption.getOpt())); - calculateLDiversity(projectId, datasetId, tableId, sensitiveAttribute, quasiIdColumnNames); + calculateLDiversity( + projectId, + datasetId, + tableId, + sensitiveAttribute, + quasiIdColumnNames, + topicId, + subscriptionId); } } } diff --git a/dlp/src/main/java/com/example/dlp/Templates.java b/dlp/src/main/java/com/example/dlp/Templates.java new file mode 100644 index 00000000000..da4c5eb88c6 --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Templates.java @@ -0,0 +1,263 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.cloud.dlp.v2.DlpServiceClient.ListInspectTemplatesPage; +import com.google.cloud.dlp.v2.DlpServiceClient.ListInspectTemplatesPagedResponse; +import com.google.privacy.dlp.v2.CreateInspectTemplateRequest; +import com.google.privacy.dlp.v2.DeleteInspectTemplateRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectConfig.FindingLimits; +import com.google.privacy.dlp.v2.InspectTemplate; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ListInspectTemplatesRequest; +import com.google.privacy.dlp.v2.ListInspectTemplatesResponse; +import com.google.privacy.dlp.v2.ProjectName; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Templates { + + // [START dlp_create_inspect_template] + /** + * Create a new DLP inspection configuration template. + * + * @param displayName (Optional) The human-readable name to give the template + * @param projectId Google Cloud Project ID to call the API under + * @param templateId (Optional) The name of the template to be created + * @param infoTypeList The infoTypes of information to match + * @param minLikelihood The minimum likelihood required before returning a match + * @param maxFindings The maximum number of findings to report per request (0 = server maximum) + */ + private static void createInspectTemplate( + String displayName, + String templateId, + String description, + String projectId, + List infoTypeList, + Likelihood minLikelihood, + int maxFindings) { + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + FindingLimits findingLimits = + FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + + // Construct the inspection configuration for the template + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypeList) + .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + InspectTemplate inspectTemplate = + InspectTemplate.newBuilder() + .setInspectConfig(inspectConfig) + .setDisplayName(displayName) + .setDescription(description) + .build(); + + CreateInspectTemplateRequest createInspectTemplateRequest = + CreateInspectTemplateRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setInspectTemplate(inspectTemplate) + .setTemplateId(templateId) + .build(); + + InspectTemplate response = + dlpServiceClient.createInspectTemplate(createInspectTemplateRequest); + System.out.printf("Template created: %s", response.getName()); + } catch (Exception e) { + System.out.printf("Error creating template: %s", e.getMessage()); + } + } + // [END dlp_create_inspect_template] + + // [START dlp_list_inspect_templates] + /** + * List DLP inspection templates created in a given project + * + * @param projectId Google Cloud Project ID + */ + private static void listInspectTemplates(String projectId) { + + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + + ListInspectTemplatesRequest request = + ListInspectTemplatesRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setPageSize(1) + .build(); + + ListInspectTemplatesPagedResponse response = dlpServiceClient.listInspectTemplates(request); + ListInspectTemplatesPage page = response.getPage(); + ListInspectTemplatesResponse templatesResponse = page.getResponse(); + + for (InspectTemplate template : templatesResponse.getInspectTemplatesList()) { + System.out.printf("Template name: %s\n", template.getName()); + if (template.getDisplayName() != null) { + System.out.printf("\tDisplay name: %s \n", template.getDisplayName()); + System.out.printf("\tCreate time: %s \n", template.getCreateTime()); + System.out.printf("\tUpdate time: %s \n", template.getUpdateTime()); + + // print inspection config + InspectConfig inspectConfig = template.getInspectConfig(); + for (InfoType infoType : inspectConfig.getInfoTypesList()) { + System.out.printf("\tInfoType: %s\n", infoType.getName()); + } + System.out.printf("\tMin likelihood: %s\n", inspectConfig.getMinLikelihood()); + System.out.printf("\tLimits: %s\n", inspectConfig.getLimits().getMaxFindingsPerRequest()); + } + } + } catch (Exception e) { + System.out.printf("Error creating template: %s", e.getMessage()); + } + } + // [END dlp_list_inspect_templates] + + // [START dlp_delete_inspect_template] + /** + * Delete the DLP inspection configuration template with the specified name. + * + * @param projectId Google Cloud Project ID + * @param templateId Template ID to be deleted + */ + private static void deleteInspectTemplate(String projectId, String templateId) { + + // construct the template name to be deleted + String templateName = String.format("projects/%s/inspectTemplates/%s", projectId, templateId); + // instantiate the client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + // create delete template request + DeleteInspectTemplateRequest request = + DeleteInspectTemplateRequest.newBuilder().setName(templateName).build(); + + dlpServiceClient.deleteInspectTemplate(request); + System.out.printf("Deleted template: %s\n", templateName); + } catch (Exception e) { + System.err.printf("Error deleting template: %s\n", templateName); + } + } + // [END dlp_delete_inspect_template] + + /** Command line application to create, list and delete DLP inspect templates. */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option createOption = new Option("c", "create", false, "Create inspect template"); + optionsGroup.addOption(createOption); + + Option listOption = new Option("l", "list", false, "List inspect templates"); + optionsGroup.addOption(listOption); + + Option deleteOption = new Option("d", "delete", false, "Delete inspect template"); + optionsGroup.addOption(deleteOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option minLikelihoodOption = + Option.builder("minLikelihood").hasArg(true).required(false).build(); + commandLineOptions.addOption(minLikelihoodOption); + + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + Option templateIdOption = Option.builder("templateId").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateIdOption); + + Option templateDescription = Option.builder("description").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateDescription); + + Option templateDisplayNameOption = + Option.builder("displayName").hasArg(true).required(false).build(); + commandLineOptions.addOption(templateDisplayNameOption); + + Option includeQuoteOption = Option.builder("includeQuote").hasArg(true).required(false).build(); + commandLineOptions.addOption(includeQuoteOption); + + Option maxFindingsOption = Option.builder("maxFindings").hasArg(true).required(false).build(); + commandLineOptions.addOption(maxFindingsOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(Redact.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + + if (cmd.hasOption(createOption.getOpt())) { + String templateId = cmd.getOptionValue(templateIdOption.getOpt()); + String displayName = cmd.getOptionValue(templateDisplayNameOption.getOpt()); + String description = cmd.getOptionValue(templateDescription.getOpt()); + + Likelihood minLikelihood = + Likelihood.valueOf( + cmd.getOptionValue( + minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); + + List infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + if (infoTypes != null) { + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + int maxFindings = Integer.valueOf(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); + createInspectTemplate( + displayName, + templateId, + description, + projectId, + infoTypesList, + minLikelihood, + maxFindings); + + } else if (cmd.hasOption(listOption.getOpt())) { + listInspectTemplates(projectId); + } else if (cmd.hasOption(deleteOption.getOpt())) { + String templateId = cmd.getOptionValue(templateIdOption.getOpt()); + deleteInspectTemplate(projectId, templateId); + } + } +} diff --git a/dlp/src/main/java/com/example/dlp/Triggers.java b/dlp/src/main/java/com/example/dlp/Triggers.java new file mode 100644 index 00000000000..c6e180d58dc --- /dev/null +++ b/dlp/src/main/java/com/example/dlp/Triggers.java @@ -0,0 +1,299 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import com.google.cloud.ServiceOptions; +import com.google.cloud.dlp.v2.DlpServiceClient; +import com.google.privacy.dlp.v2.CloudStorageOptions; +import com.google.privacy.dlp.v2.CreateJobTriggerRequest; +import com.google.privacy.dlp.v2.DeleteJobTriggerRequest; +import com.google.privacy.dlp.v2.InfoType; +import com.google.privacy.dlp.v2.InspectConfig; +import com.google.privacy.dlp.v2.InspectJobConfig; +import com.google.privacy.dlp.v2.JobTrigger; +import com.google.privacy.dlp.v2.Likelihood; +import com.google.privacy.dlp.v2.ListJobTriggersRequest; +import com.google.privacy.dlp.v2.ProjectJobTriggerName; +import com.google.privacy.dlp.v2.ProjectName; +import com.google.privacy.dlp.v2.Schedule; +import com.google.privacy.dlp.v2.StorageConfig; +import com.google.protobuf.Duration; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.OptionGroup; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; + +public class Triggers { + + // [START dlp_create_trigger] + /** + * Schedule a DLP inspection trigger for a GCS location. + * + * @param triggerId (Optional) name of the trigger to be created + * @param displayName (Optional) display name for the trigger to be created + * @param description (Optional) description for the trigger to be created + * @param scanPeriod How often to wait between scans, in days (minimum = 1 day) + * @param infoTypes infoTypes of information to match eg. InfoType.PHONE_NUMBER, + * InfoType.EMAIL_ADDRESS + * @param minLikelihood minimum likelihood required before returning a match + * @param maxFindings maximum number of findings to report per request (0 = server maximum) + * @param projectId The project ID to run the API call under + */ + private static void createTrigger( + String triggerId, + String displayName, + String description, + String bucketName, + String fileName, + int scanPeriod, + List infoTypes, + Likelihood minLikelihood, + int maxFindings, + String projectId) + throws Exception { + + // instantiate a client + DlpServiceClient dlpServiceClient = DlpServiceClient.create(); + try { + + CloudStorageOptions cloudStorageOptions = + CloudStorageOptions.newBuilder() + .setFileSet( + CloudStorageOptions.FileSet.newBuilder() + .setUrl("gs://" + bucketName + "/" + fileName)) + .build(); + StorageConfig storageConfig = + StorageConfig.newBuilder().setCloudStorageOptions(cloudStorageOptions).build(); + + InspectConfig.FindingLimits findingLimits = + InspectConfig.FindingLimits.newBuilder().setMaxFindingsPerRequest(maxFindings).build(); + + InspectConfig inspectConfig = + InspectConfig.newBuilder() + .addAllInfoTypes(infoTypes) + .setMinLikelihood(minLikelihood) + .setLimits(findingLimits) + .build(); + + InspectJobConfig inspectJobConfig = + InspectJobConfig.newBuilder() + .setInspectConfig(inspectConfig) + .setStorageConfig(storageConfig) + .build(); + + // Schedule scan of GCS bucket every scanPeriod number of days (minimum = 1 day) + Duration duration = Duration.newBuilder().setSeconds(scanPeriod * 24 * 3600).build(); + Schedule schedule = Schedule.newBuilder().setRecurrencePeriodDuration(duration).build(); + JobTrigger.Trigger trigger = JobTrigger.Trigger.newBuilder().setSchedule(schedule).build(); + JobTrigger jobTrigger = + JobTrigger.newBuilder() + .setInspectJob(inspectJobConfig) + .setName(triggerId) + .setDisplayName(displayName) + .setDescription(description) + .setStatus(JobTrigger.Status.HEALTHY) + .addTriggers(trigger) + .build(); + + // Create scan request + CreateJobTriggerRequest createJobTriggerRequest = + CreateJobTriggerRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .setJobTrigger(jobTrigger) + .build(); + + JobTrigger createdJobTrigger = dlpServiceClient.createJobTrigger(createJobTriggerRequest); + + System.out.println("Created Trigger: " + createdJobTrigger.getName()); + } catch (Exception e) { + System.out.println("Error creating trigger: " + e.getMessage()); + } + } + // [END dlp_create_trigger] + + // [START dlp_list_triggers] + /** + * List all DLP triggers for a given project. + * + * @param projectId The project ID to run the API call under. + */ + private static void listTriggers(String projectId) { + // Instantiates a client + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + ListJobTriggersRequest listJobTriggersRequest = + ListJobTriggersRequest.newBuilder() + .setParent(ProjectName.of(projectId).toString()) + .build(); + DlpServiceClient.ListJobTriggersPagedResponse response = + dlpServiceClient.listJobTriggers(listJobTriggersRequest); + response + .getPage() + .getValues() + .forEach( + trigger -> { + System.out.println("Trigger: " + trigger.getName()); + System.out.println("\tCreated: " + trigger.getCreateTime()); + System.out.println("\tUpdated: " + trigger.getUpdateTime()); + if (trigger.getDisplayName() != null) { + System.out.println("\tDisplay name: " + trigger.getDisplayName()); + } + if (trigger.getDescription() != null) { + System.out.println("\tDescription: " + trigger.getDescription()); + } + System.out.println("\tStatus: " + trigger.getStatus()); + System.out.println("\tError count: " + trigger.getErrorsCount()); + }); + } catch (Exception e) { + System.out.println("Error listing triggers :" + e.getMessage()); + } + } + // [END dlp_list_triggers] + + // [START dlp_delete_trigger] + /** + * Delete a DLP trigger in a project. + * + * @param projectId The project ID to run the API call under. + * @param triggerId Trigger ID + */ + private static void deleteTrigger(String projectId, String triggerId) { + + ProjectJobTriggerName triggerName = ProjectJobTriggerName.of(projectId, triggerId); + try (DlpServiceClient dlpServiceClient = DlpServiceClient.create()) { + DeleteJobTriggerRequest deleteJobTriggerRequest = + DeleteJobTriggerRequest.newBuilder().setName(triggerName.toString()).build(); + dlpServiceClient.deleteJobTrigger(deleteJobTriggerRequest); + + System.out.println("Trigger deleted: " + triggerName.toString()); + } catch (Exception e) { + System.out.println("Error deleting trigger :" + e.getMessage()); + } + } + // [END dlp_delete_trigger] + + /** Command line application to crate, list and delete triggers. */ + public static void main(String[] args) throws Exception { + + OptionGroup optionsGroup = new OptionGroup(); + optionsGroup.setRequired(true); + + Option createTriggerOption = + new Option("c", "create", false, "Create trigger to scan a GCS bucket"); + optionsGroup.addOption(createTriggerOption); + + Option listTriggersOption = new Option("l", "list", false, "List triggers"); + optionsGroup.addOption(listTriggersOption); + + Option deleteTriggerOption = new Option("d", "delete", false, "Delete trigger"); + optionsGroup.addOption(deleteTriggerOption); + + Options commandLineOptions = new Options(); + commandLineOptions.addOptionGroup(optionsGroup); + + Option bucketNameOption = Option.builder("bucketName").hasArg(true).required(false).build(); + commandLineOptions.addOption(bucketNameOption); + + Option gcsFileNameOption = Option.builder("fileName").hasArg(true).required(false).build(); + commandLineOptions.addOption(gcsFileNameOption); + + Option minLikelihoodOption = + Option.builder("minLikelihood").hasArg(true).required(false).build(); + + commandLineOptions.addOption(minLikelihoodOption); + + Option maxFindingsOption = Option.builder("maxFindings").hasArg(true).required(false).build(); + + commandLineOptions.addOption(maxFindingsOption); + + Option infoTypesOption = Option.builder("infoTypes").hasArg(true).required(false).build(); + infoTypesOption.setArgs(Option.UNLIMITED_VALUES); + commandLineOptions.addOption(infoTypesOption); + + Option projectIdOption = Option.builder("projectId").hasArg(true).required(false).build(); + commandLineOptions.addOption(projectIdOption); + + Option triggerIdOption = Option.builder("triggerId").hasArg(true).required(false).build(); + commandLineOptions.addOption(triggerIdOption); + Option displayNameOption = Option.builder("displayName").hasArg(true).required(false).build(); + commandLineOptions.addOption(displayNameOption); + Option descriptionOption = Option.builder("description").hasArg(true).required(false).build(); + commandLineOptions.addOption(descriptionOption); + + Option scanPeriodOption = Option.builder("scanPeriod").hasArg(true).required(false).build(); + commandLineOptions.addOption(scanPeriodOption); + + CommandLineParser parser = new DefaultParser(); + HelpFormatter formatter = new HelpFormatter(); + CommandLine cmd; + + try { + cmd = parser.parse(commandLineOptions, args); + } catch (ParseException e) { + System.out.println(e.getMessage()); + formatter.printHelp(DeIdentification.class.getName(), commandLineOptions); + System.exit(1); + return; + } + + String projectId = + cmd.getOptionValue(projectIdOption.getOpt(), ServiceOptions.getDefaultProjectId()); + if (cmd.hasOption("c")) { + Likelihood minLikelihood = + Likelihood.valueOf( + cmd.getOptionValue( + minLikelihoodOption.getOpt(), Likelihood.LIKELIHOOD_UNSPECIFIED.name())); + int maxFindings = Integer.parseInt(cmd.getOptionValue(maxFindingsOption.getOpt(), "0")); + String triggerId = cmd.getOptionValue(triggerIdOption.getOpt()); + String displayName = cmd.getOptionValue(displayNameOption.getOpt(), ""); + String description = cmd.getOptionValue(descriptionOption.getOpt(), ""); + String bucketName = cmd.getOptionValue(bucketNameOption.getOpt()); + String fileName = cmd.getOptionValue(gcsFileNameOption.getOpt()); + int scanPeriod = Integer.valueOf(cmd.getOptionValue(scanPeriodOption.getOpt())); + List infoTypesList = new ArrayList<>(); + if (cmd.hasOption(infoTypesOption.getOpt())) { + infoTypesList = new ArrayList<>(); + String[] infoTypes = cmd.getOptionValues(infoTypesOption.getOpt()); + for (String infoType : infoTypes) { + infoTypesList.add(InfoType.newBuilder().setName(infoType).build()); + } + } + createTrigger( + triggerId, + displayName, + description, + bucketName, + fileName, + scanPeriod, + infoTypesList, + minLikelihood, + maxFindings, + projectId); + } else if (cmd.hasOption("l")) { + // list triggers + listTriggers(projectId); + } else if (cmd.hasOption("d")) { + String triggerId = cmd.getOptionValue(triggerIdOption.getOpt()); + deleteTrigger(projectId, triggerId); + } + } +} diff --git a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java index d31708c568a..ec796c60bbb 100644 --- a/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java +++ b/dlp/src/test/java/com/example/dlp/DeIdentificationIT.java @@ -16,14 +16,17 @@ package com.example.dlp; -import static org.junit.Assert.assertEquals; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import java.util.regex.Pattern; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Arrays; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -31,9 +34,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class DeIdentificationIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -47,7 +51,7 @@ public class DeIdentificationIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + System.setOut(out); assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); @@ -56,26 +60,88 @@ public void setUp() { @Test public void testDeidStringMasksCharacters() throws Exception { String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] { - "-m", text, - "-maskingCharacter", "x", - "-numberToMask", "5" - }); + DeIdentification.main( + new String[] { + "-m", text, + "-maskingCharacter", "x", + "-numberToMask", "5" + }); String output = bout.toString(); - assertEquals(output, "My SSN is xxxxx9127\n"); + assertThat(output, containsString("My SSN is xxxxx9127")); } @Test - public void testDeidStringPerformsFpe() throws Exception { - String text = "\"My SSN is 372819127\""; - DeIdentification.main(new String[] { - "-f", text, - "-wrappedKey", wrappedKey, - "-keyName", keyName - }); + public void testDeidReidFpe() throws Exception { + + // Test DeID + String text = "My SSN is 372819127"; + DeIdentification.main( + new String[] { + "-f", + "\"" + text + "\"", + "-wrappedKey", + wrappedKey, + "-keyName", + keyName, + "-commonAlphabet", + "NUMERIC", + "-surrogateType", + "SSN_TOKEN" + }); + String deidOutput = bout.toString(); + assertFalse("Response contains original SSN.", deidOutput.contains("372819127")); + assertTrue(deidOutput.matches("My SSN is SSN_TOKEN\\(9\\):\\d+\n")); + + // Test ReID + bout.flush(); + DeIdentification.main( + new String[] { + "-r", + deidOutput.toString().trim(), + "-wrappedKey", + wrappedKey, + "-keyName", + keyName, + "-commonAlphabet", + "NUMERIC", + "-surrogateType", + "SSN_TOKEN" + }); + String reidOutput = bout.toString(); + assertThat(reidOutput, containsString(text)); + } + + @Test + public void testDeidentifyWithDateShift() throws Exception { + String outputPath = "src/test/resources/results.temp.csv"; + DeIdentification.main( + new String[] { + "-d", + "-inputCsvPath", + "src/test/resources/dates.csv", + "-outputCsvPath", + outputPath, + "-dateFields", + "birth_date,register_date", + "-lowerBoundDays", + "5", + "-upperBoundDays", + "5", + "-contextField", + "name", + "-wrappedKey", + wrappedKey, + "-keyName", + keyName + }); String output = bout.toString(); - assertFalse(output.contains(text)); - assertTrue(Pattern.compile("My SSN is \\w+").matcher(output).find()); + assertThat(output, containsString("Successfully saved date-shift output to: results.temp.csv")); + + // Compare the result against an expected output file + byte[] resultCsv = Files.readAllBytes(Paths.get(outputPath)); + byte[] correctCsv = Files.readAllBytes(Paths.get("src/test/resources/results.correct.csv")); + + assertTrue(Arrays.equals(resultCsv, correctCsv)); } @After diff --git a/dlp/src/test/java/com/example/dlp/InspectIT.java b/dlp/src/test/java/com/example/dlp/InspectIT.java index 5f57d194a8a..60f078dcb26 100644 --- a/dlp/src/test/java/com/example/dlp/InspectIT.java +++ b/dlp/src/test/java/com/example/dlp/InspectIT.java @@ -16,28 +16,30 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertThat; import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.PrintStream; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class InspectIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + // CHECKSTYLE ON: AbbreviationAsWordInName + private ByteArrayOutputStream bout; private PrintStream out; // Update to Google Cloud Storage path containing test.txt private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; // Update to Google Cloud Datastore Kind containing an entity // with phone number and email address properties. @@ -47,7 +49,7 @@ public class InspectIT { public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + System.setOut(out); assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } @@ -55,61 +57,99 @@ public void setUp() { public void testStringInspectionReturnsInfoTypes() throws Exception { String text = "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; - Inspect.main(new String[] {"-s", text}); + Inspect.main(new String[] {"-s", text, "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"}); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } @Test public void testTextFileInspectionReturnsInfoTypes() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - File file = new File(classLoader.getResource("test.txt").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + Inspect.main( + new String[] { + "-f", "src/test/resources/test.txt", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } - @Ignore // TODO: b/69461298 @Test public void testImageFileInspectionReturnsInfoTypes() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - File file = new File(classLoader.getResource("test.png").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); + Inspect.main( + new String[] { + "-f", "src/test/resources/test.png", "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS" + }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } // Requires that bucket by the specified name exists @Test public void testGcsFileInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-gcs", "-bucketName", bucketName, "-fileName", "test.txt"}); + Inspect.main( + new String[] { + "-gcs", + "-bucketName", + bucketName, + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-fileName", + "test.txt", + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } // Requires a Datastore kind containing an entity // with phone number and email address properties. @Test public void testDatastoreInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] {"-ds", "-kind", datastoreKind}); + Inspect.main( + new String[] { + "-ds", + "-kind", + datastoreKind, + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + assertThat(output, containsString("PHONE_NUMBER")); + assertThat(output, containsString("EMAIL_ADDRESS")); } @Test public void testBigqueryInspectionReturnsInfoTypes() throws Exception { - Inspect.main(new String[] { - "-bq", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful" - }); + Inspect.main( + new String[] { + "-bq", + "-datasetId", + "integration_tests_dlp", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-tableId", + "harmful", + "-infoTypes", + "PHONE_NUMBER", + "EMAIL_ADDRESS" + }); String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); + assertThat(output, containsString("PHONE_NUMBER")); } @After diff --git a/dlp/src/test/java/com/example/dlp/JobsIT.java b/dlp/src/test/java/com/example/dlp/JobsIT.java new file mode 100644 index 00000000000..73ecff9d8f4 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/JobsIT.java @@ -0,0 +1,85 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +// CHECKSTYLE OFF: AbbreviationAsWordInName +@RunWith(JUnit4.class) +public class JobsIT { + // CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + private static final Pattern jobIdPattern = Pattern.compile("projects/.*/dlpJobs/i-\\d+"); + + // Update to Google Cloud Storage path containing test.txt + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + @Test + public void testListJobs() throws Exception { + Jobs.main(new String[] {"-l", "-filter", "state=DONE"}); + String output = bout.toString(); + Matcher matcher = jobIdPattern.matcher(bout.toString()); + assertTrue("List must contain results.", matcher.find()); + } + + @Test + public void testDeleteJobs() throws Exception { + // Get a list of JobIds, and extract one to delete + Jobs.main(new String[] {"-l", "-filter", "state=DONE"}); + String jobList = bout.toString(); + Matcher matcher = jobIdPattern.matcher(jobList); + assertTrue("List must contain results.", matcher.find()); + // Extract just the ID + String jobId = matcher.group(0).split("/")[3]; + bout.reset(); + + // Delete the Job + Jobs.main(new String[] {"-d", "-jobId", jobId}); + String output = bout.toString(); + assertThat(output, containsString("Job deleted successfully.")); + } +} diff --git a/dlp/src/test/java/com/example/dlp/MetadataIT.java b/dlp/src/test/java/com/example/dlp/MetadataIT.java index 25ba2b68f5d..a4b968221c0 100644 --- a/dlp/src/test/java/com/example/dlp/MetadataIT.java +++ b/dlp/src/test/java/com/example/dlp/MetadataIT.java @@ -28,9 +28,9 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class MetadataIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -43,23 +43,20 @@ public void setUp() { assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); } - @Test - public void testRootCategoriesAreRetrieved() throws Exception { - Metadata.main(new String[] {}); - String output = bout.toString(); - assertTrue(output.contains("GOVERNMENT")); - assertTrue(output.contains("HEALTH")); + @After + public void tearDown() { + System.setOut(null); + bout.reset(); } @Test - public void testInfoTypesAreRetrieved() throws Exception { - Metadata.main(new String[] {"-category", "GOVERNMENT"}); + public void testListInfoTypes() throws Exception { + Metadata.main( + new String[] { + "-language", "en-US", + "-filter", "supported_by=INSPECT" + }); String output = bout.toString(); assertTrue(output.contains("Name") && output.contains("Display name")); } - - @After - public void tearDown() { - System.setOut(null); - } } diff --git a/dlp/src/test/java/com/example/dlp/QuickStartIT.java b/dlp/src/test/java/com/example/dlp/QuickStartIT.java index 1fa9d7b36d6..d62726ea2db 100644 --- a/dlp/src/test/java/com/example/dlp/QuickStartIT.java +++ b/dlp/src/test/java/com/example/dlp/QuickStartIT.java @@ -16,8 +16,9 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertThat; import java.io.ByteArrayOutputStream; import java.io.PrintStream; @@ -28,9 +29,10 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class QuickStartIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -46,7 +48,8 @@ public void setUp() { public void testQuickStart() throws Exception { QuickStart.main(new String[] {}); String output = bout.toString(); - assertTrue(output.contains("US_MALE_NAME")); + + assertThat(output, containsString("PERSON_NAME")); } @After diff --git a/dlp/src/test/java/com/example/dlp/RedactIT.java b/dlp/src/test/java/com/example/dlp/RedactIT.java index 798c11de258..5708fae6211 100644 --- a/dlp/src/test/java/com/example/dlp/RedactIT.java +++ b/dlp/src/test/java/com/example/dlp/RedactIT.java @@ -16,24 +16,24 @@ package com.example.dlp; -import static junit.framework.TestCase.assertFalse; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.not; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertThat; import java.io.ByteArrayOutputStream; -import java.io.File; import java.io.PrintStream; import org.junit.After; import org.junit.Before; -import org.junit.Ignore; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class RedactIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; @@ -46,36 +46,25 @@ public void setUp() { } @Test - public void testInfoTypesInStringAreReplaced() throws Exception { - String text = - "\"My phone number is (234) 456-7890 and my email address is gary@somedomain.com\""; - Redact.main(new String[] {"-s", text, "-r", "_REDACTED_"}); - String output = bout.toString(); - assertTrue(output.contains("My phone number is _REDACTED_ and my email address is _REDACTED_")); - } - - @Ignore // TODO: b/69461298 - @Test - public void testInfoTypesInImageAreReplaced() throws Exception { - ClassLoader classLoader = getClass().getClassLoader(); - // confirm that current data contains info types - File file = new File(classLoader.getResource("test.png").getFile()); - Inspect.main(new String[] {"-f", file.getAbsolutePath()}); - String output = bout.toString(); - assertTrue(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); - bout.reset(); - - String outputFilePath = "output.png"; + public void testRedactImage() throws Exception { + // InspectIT Tests verify original has PII present + String outputFilePath = "src/test/resources/output.png"; + // Restrict phone number, but not email Redact.main( new String[] { - "-f", file.getAbsolutePath(), "-infoTypes", "PHONE_NUMBER", "-o", outputFilePath + "-f", "src/test/resources/test.png", + "-infoTypes", "PHONE_NUMBER", + "-o", outputFilePath }); - Inspect.main(new String[] {"-f", outputFilePath}); - output = bout.toString(); - assertFalse(output.contains("PHONE_NUMBER")); - assertTrue(output.contains("EMAIL_ADDRESS")); + bout.reset(); + + // Verify that phone_number is missing but email is present + Inspect.main( + new String[] {"-f", outputFilePath, "-infoTypes", "PHONE_NUMBER", "EMAIL_ADDRESS"}); + String output = bout.toString(); + assertThat(output, not(containsString("PHONE_NUMBER"))); + assertThat(output, containsString("EMAIL_ADDRESS")); } @After diff --git a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java index 58a1bbb22a2..0dc18a87b47 100644 --- a/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java +++ b/dlp/src/test/java/com/example/dlp/RiskAnalysisIT.java @@ -16,7 +16,9 @@ package com.example.dlp; +import static org.hamcrest.CoreMatchers.containsString; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -29,17 +31,21 @@ import org.junit.runners.JUnit4; @RunWith(JUnit4.class) -//CHECKSTYLE OFF: AbbreviationAsWordInName +// CHECKSTYLE OFF: AbbreviationAsWordInName public class RiskAnalysisIT { - //CHECKSTYLE ON: AbbreviationAsWordInName + + // CHECKSTYLE ON: AbbreviationAsWordInName private ByteArrayOutputStream bout; private PrintStream out; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; + @Before public void setUp() { bout = new ByteArrayOutputStream(); out = new PrintStream(bout); - System.setOut(out); // TODO(b/64541432) DLP currently doesn't support GOOGLE DEFAULT AUTH + System.setOut(out); assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); assertNotNull(System.getenv("DLP_DEID_WRAPPED_KEY")); assertNotNull(System.getenv("DLP_DEID_KEY_NAME")); @@ -47,40 +53,63 @@ public void setUp() { @Test public void testNumericalStats() throws Exception { - RiskAnalysis.main(new String[] { - "-n", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Age" - }); + RiskAnalysis.main( + new String[] { + "-n", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-columnName", + "Age", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId + }); String output = bout.toString(); - assertTrue(Pattern.compile( - "Value at 0% quantile: integer_value: \\d{2}").matcher(output).find()); - assertTrue(Pattern.compile( - "Value at \\d{2}% quantile: integer_value: \\d{2}").matcher(output).find()); + assertThat(output, containsString("Value at ")); } @Test public void testCategoricalStats() throws Exception { - RiskAnalysis.main(new String[] { - "-c", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-columnName", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-c", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-columnName", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId + }); String output = bout.toString(); - assertTrue(Pattern.compile( - "Most common value occurs \\d time\\(s\\)").matcher(output).find()); + + assertTrue(Pattern.compile("Most common value occurs \\d time").matcher(output).find()); + assertTrue(Pattern.compile("Least common value occurs \\d time").matcher(output).find()); } @Test public void testKAnonymity() throws Exception { - RiskAnalysis.main(new String[] { - "-k", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-quasiIdColumnNames", "Age", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-a", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-quasiIdColumnNames", + "Age", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId + }); String output = bout.toString(); assertTrue(Pattern.compile("Bucket size range: \\[\\d, \\d\\]").matcher(output).find()); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); @@ -89,19 +118,58 @@ public void testKAnonymity() throws Exception { @Test public void testLDiversity() throws Exception { - RiskAnalysis.main(new String[] { - "-l", - "-datasetId", "integration_tests_dlp", - "-tableId", "harmful", - "-sensitiveAttribute", "Name", - "-quasiIdColumnNames", "Age", "Mystery" - }); + RiskAnalysis.main( + new String[] { + "-l", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-sensitiveAttribute", + "Name", + "-quasiIdColumnNames", + "Age", + "Mystery", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId + }); String output = bout.toString(); assertTrue(output.contains("Quasi-ID values: integer_value: 19")); assertTrue(output.contains("Class size: 1")); assertTrue(output.contains("Sensitive value string_value: \"James\"")); } + @Test + public void testKMap() throws Exception { + RiskAnalysis.main( + new String[] { + "-m", + "-datasetId", + "integration_tests_dlp", + "-tableId", + "harmful", + "-topicId", + topicId, + "-subscriptionId", + subscriptionId, + "-regionCode", + "US", + "-quasiIdColumnNames", + "Age", + "Gender", + "-infoTypes", + "AGE", + "GENDER" + }); + String output = bout.toString(); + + assertTrue(Pattern.compile("Anonymity range: \\[\\d, \\d]").matcher(output).find()); + assertTrue(Pattern.compile("Size: \\d").matcher(output).find()); + assertTrue(Pattern.compile("Values: \\{\\d{2}, \"Female\"\\}").matcher(output).find()); + } + @After public void tearDown() { System.setOut(null); diff --git a/dlp/src/test/java/com/example/dlp/TemplatesIT.java b/dlp/src/test/java/com/example/dlp/TemplatesIT.java new file mode 100644 index 00000000000..11c3525d12d --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/TemplatesIT.java @@ -0,0 +1,93 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +// CHECKSTYLE OFF: AbbreviationAsWordInName +public class TemplatesIT { + // CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + @Test + public void testCreateInspectTemplate() throws Exception { + Templates.main( + new String[] { + "-c", + "-displayName", + String.format("test-name-%s", UUID.randomUUID()), + "-templateId", + String.format("template%s", UUID.randomUUID()), + "-description", + String.format("description-%s", UUID.randomUUID()) + }); + String output = bout.toString(); + assertThat(output, containsString("Template created: ")); + } + + @Test + public void testListInspectemplate() throws Exception { + Templates.main(new String[] {"-l"}); + String output = bout.toString(); + assertThat(output, containsString("Template name:")); + } + + @Test + public void testDeleteInspectTemplate() throws Exception { + // Extract a Template ID + Templates.main(new String[] {"-l"}); + String output = bout.toString(); + Matcher templateIds = Pattern.compile("template(\\w|\\-)+").matcher(output); + assertTrue(templateIds.find()); + String templateId = templateIds.group(0); + bout.reset(); + Templates.main(new String[] {"-d", "-templateId", templateId}); + output = bout.toString(); + assertThat(output, containsString("Deleted template:")); + } +} diff --git a/dlp/src/test/java/com/example/dlp/TriggersIT.java b/dlp/src/test/java/com/example/dlp/TriggersIT.java new file mode 100644 index 00000000000..4a9b07c4063 --- /dev/null +++ b/dlp/src/test/java/com/example/dlp/TriggersIT.java @@ -0,0 +1,106 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dlp; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.UUID; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +// CHECKSTYLE OFF: AbbreviationAsWordInName +public class TriggersIT { + + //CHECKSTYLE ON: AbbreviationAsWordInName + + private ByteArrayOutputStream bout; + private PrintStream out; + + private String bucketName = System.getenv("GOOGLE_CLOUD_PROJECT") + "/dlp"; + private String topicId = "dlp-tests"; + private String subscriptionId = "dlp-test"; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + System.setOut(out); + assertNotNull(System.getenv("GOOGLE_APPLICATION_CREDENTIALS")); + } + + @After + public void tearDown() { + System.setOut(null); + bout.reset(); + } + + @Test + public void testCreateTrigger() throws Exception { + Triggers.main( + new String[] { + "-c", + "-displayName", + String.format("trigger-name-%s", UUID.randomUUID()), + "-triggerId", + String.format("trigger%s", UUID.randomUUID()), + "-description", + String.format("description-%s", UUID.randomUUID()), + "-bucketName", + bucketName, + "-fileName", + "test.txt", + "-scanPeriod", + "1" + }); + String output = bout.toString(); + assertThat(output, containsString("Created Trigger:")); + } + + @Test + public void testListTrigger() throws Exception { + Triggers.main(new String[] {"-l"}); + String output = bout.toString(); + assertThat(output, containsString("Trigger:")); + } + + @Test + public void testDeleteTrigger() throws Exception { + Triggers.main(new String[] {"-l"}); + String output = bout.toString(); + Matcher templateIds = Pattern.compile("(?<=jobTriggers/)[0-9]+").matcher(output); + assertTrue(templateIds.find()); + String triggerId = templateIds.group(0); + bout.reset(); + Triggers.main( + new String[] { + "-d", "-triggerId", triggerId, + }); + output = bout.toString(); + assertThat(output, containsString("Trigger deleted:")); + } +} diff --git a/dlp/src/test/resources/dates.csv b/dlp/src/test/resources/dates.csv new file mode 100644 index 00000000000..290a85dec68 --- /dev/null +++ b/dlp/src/test/resources/dates.csv @@ -0,0 +1,5 @@ +name,birth_date,credit_card,register_date +Ann,01/01/1970,4532908762519852,07/21/1996 +James,03/06/1988,4301261899725540,04/09/2001 +Dan,08/14/1945,4620761856015295,11/15/2011 +Laura,11/03/1992,4564981067258901,01/04/2017 diff --git a/dlp/src/test/resources/results.correct.csv b/dlp/src/test/resources/results.correct.csv new file mode 100644 index 00000000000..5b078fe825a --- /dev/null +++ b/dlp/src/test/resources/results.correct.csv @@ -0,0 +1,5 @@ +name,birth_date,credit_card,register_date +Ann,1970-01-06,4532908762519852,1996-07-26 +James,1988-03-11,4301261899725540,2001-04-14 +Dan,1945-08-19,4620761856015295,2011-11-20 +Laura,1992-11-08,4564981067258901,2017-01-09 From ebdc07f4d93844ac97991047e2ef0692be181b94 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 21 Mar 2018 13:18:53 -0700 Subject: [PATCH 3/4] bigquery_datatransfer -> bigquerydatatransfer Update region tags because we are treating BigQuery Data Transfer Service as its own product in the samples tracker. --- .../com/example/bigquerydatatransfer/QuickstartSample.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/datatransfer/cloud-client/src/main/java/com/example/bigquerydatatransfer/QuickstartSample.java b/bigquery/datatransfer/cloud-client/src/main/java/com/example/bigquerydatatransfer/QuickstartSample.java index 4091dd7d20e..943b978edc9 100644 --- a/bigquery/datatransfer/cloud-client/src/main/java/com/example/bigquerydatatransfer/QuickstartSample.java +++ b/bigquery/datatransfer/cloud-client/src/main/java/com/example/bigquerydatatransfer/QuickstartSample.java @@ -16,7 +16,7 @@ package com.example.bigquerydatatransfer; -// [START bigquery_datatransfer_quickstart] +// [START bigquerydatatransfer_quickstart] // Imports the Google Cloud client library import com.google.cloud.bigquery.datatransfer.v1.DataSource; @@ -56,4 +56,4 @@ public static void main(String... args) throws Exception { } } } -// [END bigquery_datatransfer_quickstart] +// [END bigquerydatatransfer_quickstart] From df10b39769f866f71dee89a6e5ed07ec2c1895b9 Mon Sep 17 00:00:00 2001 From: Mairbek Khadikov Date: Wed, 21 Mar 2018 13:29:02 -0700 Subject: [PATCH 4/4] Updated Spanner Dataflow connector samples (#1059) * Updated Spanner Dataflow connector samples * Make checkstyle happy * First test * Updated the test * Added tests * Makes checkstyle happy * Use system properties * Add add system property * New lines --- dataflow/spanner-io/pom.xml | 17 +- .../com/example/dataflow/EstimateSize.java | 86 +++++++++ .../example/dataflow/SpannerGroupWrite.java | 115 ++++++++++++ .../com/example/dataflow/SpannerRead.java | 66 ++----- .../com/example/dataflow/SpannerReadAll.java | 90 +++++++++ .../com/example/dataflow/SpannerWrite.java | 37 ++-- .../example/dataflow/TransactionalRead.java | 146 +++++++++++++++ .../example/dataflow/SpannerGroupWriteIT.java | 149 +++++++++++++++ .../com/example/dataflow/SpannerReadIT.java | 177 ++++++++++++++++++ .../com/example/dataflow/SpannerWriteIT.java | 135 +++++++++++++ 10 files changed, 931 insertions(+), 87 deletions(-) create mode 100644 dataflow/spanner-io/src/main/java/com/example/dataflow/EstimateSize.java create mode 100644 dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerGroupWrite.java create mode 100644 dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadAll.java create mode 100644 dataflow/spanner-io/src/main/java/com/example/dataflow/TransactionalRead.java create mode 100644 dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerGroupWriteIT.java create mode 100644 dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java create mode 100644 dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerWriteIT.java diff --git a/dataflow/spanner-io/pom.xml b/dataflow/spanner-io/pom.xml index 65700a9645f..165efbffa65 100644 --- a/dataflow/spanner-io/pom.xml +++ b/dataflow/spanner-io/pom.xml @@ -48,6 +48,16 @@ maven-compiler-plugin 3.7.0 + + org.apache.maven.plugins + maven-failsafe-plugin + 2.19.1 + + + default-instance + + + @@ -84,13 +94,6 @@ ${apache_beam.version} - - - com.google.cloud - google-cloud-spanner - 0.34.0-beta - - org.slf4j diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/EstimateSize.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/EstimateSize.java new file mode 100644 index 00000000000..5f393d3ed38 --- /dev/null +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/EstimateSize.java @@ -0,0 +1,86 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import com.google.cloud.spanner.Struct; +import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.PTransform; +import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; + +/** + * Estimates the size of the {@code Struct}. + */ +public class EstimateSize extends PTransform, PCollection> { + + public static EstimateSize create() { + return new EstimateSize(); + } + + private EstimateSize() { + } + + @Override + public PCollection expand(PCollection input) { + return input.apply(ParDo.of(new EstimateStructSizeFn())); + } + + /** + * Estimates the size of a Spanner row. For simplicity, arrays and structs aren't supported. + */ + public static class EstimateStructSizeFn extends DoFn { + + @ProcessElement + public void processElement(ProcessContext c) throws Exception { + Struct row = c.element(); + long sum = 0; + for (int i = 0; i < row.getColumnCount(); i++) { + if (row.isNull(i)) { + continue; + } + + switch (row.getColumnType(i).getCode()) { + case BOOL: + sum += 1; + break; + case INT64: + case FLOAT64: + sum += 8; + break; + case TIMESTAMP: + case DATE: + sum += 12; + break; + case BYTES: + sum += row.getBytes(i).length(); + break; + case STRING: + sum += row.getString(i).length(); + break; + case ARRAY: + throw new IllegalArgumentException("Arrays are not supported :("); + case STRUCT: + throw new IllegalArgumentException("Structs are not supported :("); + default: + throw new IllegalArgumentException("Unsupported type :("); + } + } + c.output(sum); + } + } + +} diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerGroupWrite.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerGroupWrite.java new file mode 100644 index 00000000000..e1db90f1821 --- /dev/null +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerGroupWrite.java @@ -0,0 +1,115 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import com.google.cloud.Timestamp; +import com.google.cloud.spanner.Mutation; +import com.google.common.base.Charsets; +import com.google.common.hash.Hashing; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.gcp.spanner.MutationGroup; +import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.Validation; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.PCollection; + +/** + * This sample demonstrates how to group together mutations when writing to the Cloud Spanner + * database. + */ +public class SpannerGroupWrite { + public interface Options extends PipelineOptions { + + @Description("Spanner instance ID to write to") + @Validation.Required + String getInstanceId(); + + void setInstanceId(String value); + + @Description("Spanner database name to write to") + @Validation.Required + String getDatabaseId(); + + void setDatabaseId(String value); + + @Description("Singers output filename in the format: singer_id\tfirst_name\tlast_name") + @Validation.Required + String getSuspiciousUsersFile(); + + void setSuspiciousUsersFile(String value); + + } + + public static void main(String[] args) { + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + Pipeline p = Pipeline.create(options); + + String instanceId = options.getInstanceId(); + String databaseId = options.getDatabaseId(); + + String usersIdFile = options.getSuspiciousUsersFile(); + + PCollection suspiciousUserIds = p.apply(TextIO.read().from(usersIdFile)); + + final Timestamp timestamp = Timestamp.now(); + + // [START spanner_dataflow_writegroup] + PCollection mutations = suspiciousUserIds + .apply(MapElements.via(new SimpleFunction() { + + @Override + public MutationGroup apply(String userId) { + // Immediately block the user. + Mutation userMutation = Mutation.newUpdateBuilder("Users") + .set("id").to(userId) + .set("state").to("BLOCKED") + .build(); + long generatedId = Hashing.sha1().newHasher() + .putString(userId, Charsets.UTF_8) + .putLong(timestamp.getSeconds()) + .putLong(timestamp.getNanos()) + .hash() + .asLong(); + + // Add an entry to pending review requests. + Mutation pendingReview = Mutation.newInsertOrUpdateBuilder("PendingReviews") + .set("id").to(generatedId) // Must be deterministically generated. + .set("userId").to(userId) + .set("action").to("REVIEW ACCOUNT") + .set("note").to("Suspicious activity detected.") + .build(); + + return MutationGroup.create(userMutation, pendingReview); + } + })); + + mutations.apply(SpannerIO.write() + .withInstanceId(instanceId) + .withDatabaseId(databaseId) + .grouped()); + // [END spanner_dataflow_writegroup] + + p.run().waitUntilFinish(); + + } + +} diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerRead.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerRead.java index 330c560cb92..439549f245f 100644 --- a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerRead.java +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerRead.java @@ -24,10 +24,6 @@ import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; import org.apache.beam.sdk.transforms.Sum; import org.apache.beam.sdk.transforms.ToString; import org.apache.beam.sdk.values.PCollection; @@ -87,49 +83,6 @@ public interface Options extends PipelineOptions { void setOutput(String value); } - /** - * Estimates the size of a Spanner row. For simplicity, arrays and structs aren't supported. - */ - public static class EstimateStructSizeFn extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - Struct row = c.element(); - long sum = 0; - for (int i = 0; i < row.getColumnCount(); i++) { - if (row.isNull(i)) { - continue; - } - - switch (row.getColumnType(i).getCode()) { - case BOOL: - sum += 1; - break; - case INT64: - case FLOAT64: - sum += 8; - break; - case TIMESTAMP: - case DATE: - sum += 12; - break; - case BYTES: - sum += row.getBytes(i).length(); - break; - case STRING: - sum += row.getString(i).length(); - break; - case ARRAY: - throw new IllegalArgumentException("Arrays are not supported :("); - case STRUCT: - throw new IllegalArgumentException("Structs are not supported :("); - default: - throw new IllegalArgumentException("Unsupported type :("); - } - } - c.output(sum); - } - } public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); @@ -137,23 +90,26 @@ public static void main(String[] args) { String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); - String query = "SELECT * FROM " + options.getTable(); - - PCollection tableEstimatedSize = p - // Query for all the columns and rows in the specified Spanner table - .apply(SpannerIO.read() + // [START spanner_dataflow_read] + // Query for all the columns and rows in the specified Spanner table + PCollection records = p.apply( + SpannerIO.read() .withInstanceId(instanceId) .withDatabaseId(databaseId) - .withQuery(query)) + .withQuery("SELECT * FROM " + options.getTable())); + // [START spanner_dataflow_read] + + + PCollection tableEstimatedSize = records // Estimate the size of every row - .apply(ParDo.of(new EstimateStructSizeFn())) + .apply(EstimateSize.create()) // Sum all the row sizes to get the total estimated size of the table .apply(Sum.longsGlobally()); // Write the total size to a file tableEstimatedSize .apply(ToString.elements()) - .apply(TextIO.write().to(options.getOutput())); + .apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish(); } diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadAll.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadAll.java new file mode 100644 index 00000000000..b0eb417ced5 --- /dev/null +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerReadAll.java @@ -0,0 +1,90 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import com.google.cloud.spanner.Struct; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.gcp.spanner.ReadOperation; +import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; +import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.Validation; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.SerializableFunction; +import org.apache.beam.sdk.transforms.Sum; +import org.apache.beam.sdk.transforms.ToString; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TypeDescriptor; + +/** + * This sample demonstrates how to read all data from the Cloud Spanner database. + */ +public class SpannerReadAll { + + public interface Options extends PipelineOptions { + + @Description("Spanner instance ID to query from") + @Validation.Required + String getInstanceId(); + + void setInstanceId(String value); + + @Description("Spanner database name to query from") + @Validation.Required + String getDatabaseId(); + + void setDatabaseId(String value); + + @Description("Output filename for records size") + @Validation.Required + String getOutput(); + + void setOutput(String value); + } + + public static void main(String[] args) { + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + Pipeline p = Pipeline.create(options); + + SpannerConfig spannerConfig = SpannerConfig.create() + .withInstanceId(options.getInstanceId()) + .withDatabaseId(options.getDatabaseId()); + // [START spanner_dataflow_readall] + PCollection allRecords = p.apply(SpannerIO.read() + .withSpannerConfig(spannerConfig) + .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + + ".table_catalog = '' AND t.table_schema = ''")).apply( + MapElements.into(TypeDescriptor.of(ReadOperation.class)) + .via((SerializableFunction) input -> { + String tableName = input.getString(0); + return ReadOperation.create().withQuery("SELECT * FROM " + tableName); + })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); + // [END spanner_dataflow_readall] + + PCollection dbEstimatedSize = allRecords.apply(EstimateSize.create()) + .apply(Sum.longsGlobally()); + + dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) + .withoutSharding()); + + p.run().waitUntilFinish(); + } + +} diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerWrite.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerWrite.java index 67502c05fdc..09e9b3e301a 100644 --- a/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerWrite.java +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/SpannerWrite.java @@ -22,13 +22,13 @@ import org.apache.beam.sdk.coders.DefaultCoder; import org.apache.beam.sdk.io.TextIO; import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; -import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.Description; import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.Validation; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.values.PCollection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,9 +61,7 @@ mvn exec:java \ -Dexec.mainClass=com.example.dataflow.SpannerWrite \ -Dexec.args="--instanceId=my-instance-id \ - --databaseId=my-database-id \ - --singersTable=my_singers_table \ - --albumsTable=my_albums_table" + --databaseId=my-database-id */ public class SpannerWrite { @@ -73,13 +71,11 @@ public class SpannerWrite { public interface Options extends PipelineOptions { @Description("Singers filename in the format: singer_id\tfirst_name\tlast_name") - @Default.String("data/singers.txt") String getSingersFilename(); void setSingersFilename(String value); @Description("Albums filename in the format: singer_id\talbum_id\talbum_title") - @Default.String("data/albums.txt") String getAlbumsFilename(); void setAlbumsFilename(String value); @@ -95,18 +91,6 @@ public interface Options extends PipelineOptions { String getDatabaseId(); void setDatabaseId(String value); - - @Description("Spanner singers table name to write to") - @Validation.Required - String getSingersTable(); - - void setSingersTable(String value); - - @Description("Spanner albums table name to write to") - @Validation.Required - String getAlbumsTable(); - - void setAlbumsTable(String value); } @DefaultCoder(AvroCoder.class) @@ -187,8 +171,6 @@ public static void main(String[] args) { String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); - String singersTable = options.getSingersTable(); - String albumsTable = options.getAlbumsTable(); // Read singers from a tab-delimited file p.apply("ReadSingers", TextIO.read().from(options.getSingersFilename())) @@ -199,7 +181,7 @@ public static void main(String[] args) { @ProcessElement public void processElement(ProcessContext c) { Singer singer = c.element(); - c.output(Mutation.newInsertOrUpdateBuilder(singersTable) + c.output(Mutation.newInsertOrUpdateBuilder("singers") .set("singerId").to(singer.singerId) .set("firstName").to(singer.firstName) .set("lastName").to(singer.lastName) @@ -212,25 +194,30 @@ public void processElement(ProcessContext c) { .withDatabaseId(databaseId)); // Read albums from a tab-delimited file - p.apply("ReadAlbums", TextIO.read().from(options.getAlbumsFilename())) + PCollection albums = p + .apply("ReadAlbums", TextIO.read().from(options.getAlbumsFilename())) // Parse the tab-delimited lines into Album objects - .apply("ParseAlbums", ParDo.of(new ParseAlbum())) + .apply("ParseAlbums", ParDo.of(new ParseAlbum())); + + // [START spanner_dataflow_write] + albums // Spanner expects a Mutation object, so create it using the Album's data .apply("CreateAlbumMutation", ParDo.of(new DoFn() { @ProcessElement public void processElement(ProcessContext c) { Album album = c.element(); - c.output(Mutation.newInsertOrUpdateBuilder(albumsTable) + c.output(Mutation.newInsertOrUpdateBuilder("albums") .set("singerId").to(album.singerId) .set("albumId").to(album.albumId) .set("albumTitle").to(album.albumTitle) .build()); } })) - // Finally write the Mutations to Spanner + // Write mutations to Spanner .apply("WriteAlbums", SpannerIO.write() .withInstanceId(instanceId) .withDatabaseId(databaseId)); + // [END spanner_dataflow_write] p.run().waitUntilFinish(); } diff --git a/dataflow/spanner-io/src/main/java/com/example/dataflow/TransactionalRead.java b/dataflow/spanner-io/src/main/java/com/example/dataflow/TransactionalRead.java new file mode 100644 index 00000000000..155a9e51a13 --- /dev/null +++ b/dataflow/spanner-io/src/main/java/com/example/dataflow/TransactionalRead.java @@ -0,0 +1,146 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import com.google.cloud.spanner.Struct; +import com.google.cloud.spanner.TimestampBound; +import com.google.common.base.Joiner; +import org.apache.beam.sdk.Pipeline; +import org.apache.beam.sdk.coders.AvroCoder; +import org.apache.beam.sdk.coders.DefaultCoder; +import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.gcp.spanner.SpannerConfig; +import org.apache.beam.sdk.io.gcp.spanner.SpannerIO; +import org.apache.beam.sdk.io.gcp.spanner.Transaction; +import org.apache.beam.sdk.options.Description; +import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.options.Validation; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.PCollectionView; + +public class TransactionalRead { + + private static final String DELIMITER = "\t"; + + public interface Options extends PipelineOptions { + + @Description("Spanner instance ID to write to") + @Validation.Required + String getInstanceId(); + + void setInstanceId(String value); + + @Description("Spanner database name to write to") + @Validation.Required + String getDatabaseId(); + + void setDatabaseId(String value); + + @Description("Singers output filename in the format: singer_id\tfirst_name\tlast_name") + String getSingersFilename(); + + void setSingersFilename(String value); + + @Description("Albums output filename in the format: singer_id\talbum_id\talbum_title") + String getAlbumsFilename(); + + void setAlbumsFilename(String value); + + } + + @DefaultCoder(AvroCoder.class) + static class Singer { + + long singerId; + String firstName; + String lastName; + + Singer() { + } + + Singer(long singerId, String firstName, String lastName) { + this.singerId = singerId; + this.firstName = firstName; + this.lastName = lastName; + } + } + + @DefaultCoder(AvroCoder.class) + static class Album { + + long singerId; + long albumId; + String albumTitle; + + Album() { + } + + Album(long singerId, long albumId, String albumTitle) { + this.singerId = singerId; + this.albumId = albumId; + this.albumTitle = albumTitle; + } + } + + public static void main(String[] args) { + Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); + Pipeline p = Pipeline.create(options); + + String instanceId = options.getInstanceId(); + String databaseId = options.getDatabaseId(); + + // [START spanner_dataflow_txread] + SpannerConfig spannerConfig = SpannerConfig.create() + .withInstanceId(instanceId) + .withDatabaseId(databaseId); + PCollectionView tx = p.apply( + SpannerIO.createTransaction() + .withSpannerConfig(spannerConfig) + .withTimestampBound(TimestampBound.strong())); + PCollection singers = p.apply(SpannerIO.read() + .withSpannerConfig(spannerConfig) + .withQuery("SELECT SingerID, FirstName, LastName FROM Singers") + .withTransaction(tx)); + PCollection albums = p.apply(SpannerIO.read().withSpannerConfig(spannerConfig) + .withQuery("SELECT SingerId, AlbumId, AlbumTitle FROM Albums") + .withTransaction(tx)); + // [END spanner_dataflow_txread] + + singers.apply(MapElements.via(new SimpleFunction() { + + @Override + public String apply(Struct input) { + return Joiner.on(DELIMITER).join(input.getLong(0), input.getString(1), input.getString(2)); + } + })).apply(TextIO.write().to(options.getSingersFilename()).withoutSharding()); + + albums.apply(MapElements.via(new SimpleFunction() { + + @Override + public String apply(Struct input) { + return Joiner.on(DELIMITER).join(input.getLong(0), input.getLong(1), input.getString(2)); + } + })).apply(TextIO.write().to(options.getAlbumsFilename()).withoutSharding()); + + p.run().waitUntilFinish(); + + } + +} diff --git a/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerGroupWriteIT.java b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerGroupWriteIT.java new file mode 100644 index 00000000000..c35773fd8a5 --- /dev/null +++ b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerGroupWriteIT.java @@ -0,0 +1,149 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import com.google.cloud.spanner.Database; +import com.google.cloud.spanner.DatabaseAdminClient; +import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.DatabaseId; +import com.google.cloud.spanner.Mutation; +import com.google.cloud.spanner.Operation; +import com.google.cloud.spanner.ReadContext; +import com.google.cloud.spanner.ResultSet; +import com.google.cloud.spanner.Spanner; +import com.google.cloud.spanner.SpannerException; +import com.google.cloud.spanner.SpannerOptions; +import com.google.cloud.spanner.Statement; +import com.google.cloud.spanner.TransactionContext; +import com.google.cloud.spanner.TransactionRunner; +import com.google.spanner.admin.database.v1.CreateDatabaseMetadata; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import javax.annotation.Nullable; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class SpannerGroupWriteIT { + + String instanceId; + String databaseId; + + Path tempPath; + Spanner spanner; + SpannerOptions spannerOptions; + + @Before + public void setUp() throws Exception { + instanceId = System.getProperty("spanner.test.instance"); + databaseId = "df-spanner-groupwrite-it"; + + spannerOptions = SpannerOptions.getDefaultInstance(); + spanner = spannerOptions.getService(); + + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Does not exist, ignore. + } + + Operation op = adminClient + .createDatabase(instanceId, databaseId, Arrays.asList("CREATE TABLE users (" + + "id STRING(MAX) NOT NULL, state STRING(MAX) NOT NULL) PRIMARY KEY (id)", + "CREATE TABLE PendingReviews (id INT64, action STRING(MAX), " + + "note STRING(MAX), userId STRING(MAX),) PRIMARY KEY (id)")); + + op.waitFor(); + + DatabaseClient dbClient = getDbClient(); + + List mutations = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + mutations.add( + Mutation.newInsertBuilder("users").set("id").to(Integer.toString(i)).set("state") + .to("ACTIVE").build()); + } + TransactionRunner runner = dbClient.readWriteTransaction(); + runner.run(new TransactionRunner.TransactionCallable() { + + @Nullable + @Override + public Void run(TransactionContext tx) throws Exception { + tx.buffer(mutations); + return null; + } + }); + + String content = IntStream.range(0, 10).mapToObj(Integer::toString) + .collect(Collectors.joining("\n")); + tempPath = Files.createTempFile("suspicious-ids", "txt"); + Files.write(tempPath, content.getBytes()); + } + + @After + public void tearDown() throws Exception { + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Failed to cleanup. + } + + spanner.close(); + } + + @Test + public void testEndToEnd() throws Exception { + SpannerGroupWrite.main( + new String[] { "--instanceId=" + instanceId, "--databaseId=" + databaseId, + "--suspiciousUsersFile=" + tempPath, "--runner=DirectRunner" }); + + DatabaseClient dbClient = getDbClient(); + try (ReadContext context = dbClient.singleUse()) { + ResultSet rs = context.executeQuery( + Statement.newBuilder("SELECT COUNT(*) FROM users WHERE STATE = @state").bind("state") + .to("BLOCKED").build()); + assertTrue(rs.next()); + assertEquals(10, rs.getLong(0)); + + } + try (ReadContext context = dbClient.singleUse()) { + ResultSet rs = context.executeQuery( + Statement.newBuilder("SELECT COUNT(*) FROM PendingReviews WHERE ACTION = @action") + .bind("action").to("REVIEW ACCOUNT").build()); + assertTrue(rs.next()); + assertEquals(10, rs.getLong(0)); + } + } + + private DatabaseClient getDbClient() { + return spanner + .getDatabaseClient(DatabaseId.of(spannerOptions.getProjectId(), instanceId, databaseId)); + } + +} diff --git a/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java new file mode 100644 index 00000000000..621c00b2018 --- /dev/null +++ b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerReadIT.java @@ -0,0 +1,177 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import static org.junit.Assert.assertEquals; + +import com.google.cloud.spanner.Database; +import com.google.cloud.spanner.DatabaseAdminClient; +import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.DatabaseId; +import com.google.cloud.spanner.Mutation; +import com.google.cloud.spanner.Operation; +import com.google.cloud.spanner.Spanner; +import com.google.cloud.spanner.SpannerException; +import com.google.cloud.spanner.SpannerOptions; +import com.google.cloud.spanner.TransactionContext; +import com.google.cloud.spanner.TransactionRunner; +import com.google.spanner.admin.database.v1.CreateDatabaseMetadata; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import javax.annotation.Nullable; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class SpannerReadIT { + + String instanceId; + String databaseId; + + Spanner spanner; + SpannerOptions spannerOptions; + + @Before + public void setUp() throws Exception { + instanceId = System.getProperty("spanner.test.instance"); + databaseId = "df-spanner-read-it"; + + spannerOptions = SpannerOptions.getDefaultInstance(); + spanner = spannerOptions.getService(); + + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Does not exist, ignore. + } + + Operation op = adminClient + .createDatabase(instanceId, databaseId, Arrays.asList("CREATE TABLE Singers " + + "(singerId INT64 NOT NULL, firstName STRING(MAX) NOT NULL, " + + "lastName STRING(MAX) NOT NULL,) PRIMARY KEY (singerId)", + "CREATE TABLE Albums (singerId INT64 NOT NULL, albumId INT64 NOT NULL, " + + "albumTitle STRING(MAX) NOT NULL,) PRIMARY KEY (singerId, albumId)")); + + op.waitFor(); + + List mutations = Arrays.asList( + Mutation.newInsertBuilder("singers") + .set("singerId").to(1L) + .set("firstName").to("John") + .set("lastName").to("Lennon") + .build(), + Mutation.newInsertBuilder("singers") + .set("singerId").to(2L) + .set("firstName").to("Paul") + .set("lastName").to("Mccartney") + .build(), + Mutation.newInsertBuilder("singers") + .set("singerId").to(3L) + .set("firstName").to("George") + .set("lastName").to("Harrison") + .build(), + Mutation.newInsertBuilder("singers") + .set("singerId").to(4L) + .set("firstName").to("Ringo") + .set("lastName").to("Starr") + .build(), + + Mutation.newInsertBuilder("albums") + .set("singerId").to(1L) + .set("albumId").to(1L) + .set("albumTitle").to("Imagine") + .build(), + Mutation.newInsertBuilder("albums") + .set("singerId").to(2L) + .set("albumId").to(1L) + .set("albumTitle").to("Pipes of Peace") + .build() + ); + + + DatabaseClient dbClient = getDbClient(); + + TransactionRunner runner = dbClient.readWriteTransaction(); + runner.run(new TransactionRunner.TransactionCallable() { + @Nullable + @Override + public Void run(TransactionContext tx) throws Exception { + tx.buffer(mutations); + return null; + } + }); + } + + @After + public void tearDown() throws Exception { + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Failed to cleanup. + } + + spanner.close(); + } + + @Test + public void readDbEndToEnd() throws Exception { + Path outPath = Files.createTempFile("out", "txt"); + SpannerReadAll.main(new String[] { "--instanceId=" + instanceId, "--databaseId=" + databaseId, + "--output=" + outPath, "--runner=DirectRunner" }); + + String content = Files.readAllLines(outPath).stream().collect(Collectors.joining("\n")); + + assertEquals("132", content); + } + + @Test + public void readTableEndToEnd() throws Exception { + Path outPath = Files.createTempFile("out", "txt"); + SpannerRead.main(new String[] { "--instanceId=" + instanceId, "--databaseId=" + databaseId, + "--output=" + outPath, "--table=albums", "--runner=DirectRunner" }); + + String content = Files.readAllLines(outPath).stream().collect(Collectors.joining("\n")); + + assertEquals("53", content); + } + + @Test + public void reaTransactionalReadEndToEnd() throws Exception { + Path singersPath = Files.createTempFile("singers", "txt"); + Path albumsPath = Files.createTempFile("albums", "txt"); + TransactionalRead.main( + new String[] { "--instanceId=" + instanceId, "--databaseId=" + databaseId, + "--singersFilename=" + singersPath, "--albumsFilename=" + albumsPath, + "--runner=DirectRunner" }); + + assertEquals(4, Files.readAllLines(singersPath).size()); + assertEquals(2, Files.readAllLines(albumsPath).size()); + } + + private DatabaseClient getDbClient() { + return spanner + .getDatabaseClient(DatabaseId.of(spannerOptions.getProjectId(), instanceId, databaseId)); + } + +} \ No newline at end of file diff --git a/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerWriteIT.java b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerWriteIT.java new file mode 100644 index 00000000000..5631ddcbd46 --- /dev/null +++ b/dataflow/spanner-io/src/test/java/com/example/dataflow/SpannerWriteIT.java @@ -0,0 +1,135 @@ +/* + * Copyright 2018 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.dataflow; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import com.google.cloud.spanner.Database; +import com.google.cloud.spanner.DatabaseAdminClient; +import com.google.cloud.spanner.DatabaseClient; +import com.google.cloud.spanner.DatabaseId; +import com.google.cloud.spanner.Operation; +import com.google.cloud.spanner.ReadContext; +import com.google.cloud.spanner.ResultSet; +import com.google.cloud.spanner.Spanner; +import com.google.cloud.spanner.SpannerException; +import com.google.cloud.spanner.SpannerOptions; +import com.google.cloud.spanner.Statement; +import com.google.spanner.admin.database.v1.CreateDatabaseMetadata; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +@SuppressWarnings("checkstyle:abbreviationaswordinname") +public class SpannerWriteIT { + + String instanceId; + String databaseId; + + Path singersPath; + Path albumsPath; + Spanner spanner; + SpannerOptions spannerOptions; + + @Before + public void setUp() throws Exception { + + instanceId = System.getProperty("spanner.test.instance"); + databaseId = "df-spanner-write-it"; + + spannerOptions = SpannerOptions.getDefaultInstance(); + spanner = spannerOptions.getService(); + + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Does not exist, ignore. + } + + Operation op = adminClient + .createDatabase(instanceId, databaseId, Arrays.asList("CREATE TABLE Singers " + + "(singerId INT64 NOT NULL, " + + "firstName STRING(MAX) NOT NULL, lastName STRING(MAX) NOT NULL,) " + + "PRIMARY KEY (singerId)", + "CREATE TABLE Albums (singerId INT64 NOT NULL, " + + "albumId INT64 NOT NULL, albumTitle STRING(MAX) NOT NULL,) " + + "PRIMARY KEY (singerId, albumId)")); + + op.waitFor(); + + String singers = Stream + .of("1\tJohn\tLennon", "2\tPaul\tMccartney", "3\tGeorge\tHarrison", "4\tRingo\tStarr") + .collect(Collectors.joining("\n")); + singersPath = Files.createTempFile("singers", "txt"); + Files.write(singersPath, singers.getBytes()); + + String albums = Stream + .of("1\t1\tImagine", "2\t1\tPipes of Peace", "3\t1\tDark Horse") + .collect(Collectors.joining("\n")); + albumsPath = Files.createTempFile("albums", "txt"); + Files.write(albumsPath, albums.getBytes()); + + + } + + @After + public void tearDown() throws Exception { + DatabaseAdminClient adminClient = spanner.getDatabaseAdminClient(); + try { + adminClient.dropDatabase(instanceId, databaseId); + } catch (SpannerException e) { + // Failed to cleanup. + } + + spanner.close(); + } + + @Test + public void testEndToEnd() throws Exception { + SpannerWrite.main(new String[] { "--instanceId=" + instanceId, "--databaseId=" + databaseId, + "--singersFilename=" + singersPath, "--albumsFilename=" + albumsPath, + "--runner=DirectRunner" }); + + DatabaseClient dbClient = getDbClient(); + try (ReadContext context = dbClient.singleUse()) { + ResultSet rs = context.executeQuery( + Statement.of("SELECT COUNT(*) FROM singers")); + assertTrue(rs.next()); + assertEquals(4, rs.getLong(0)); + + } + try (ReadContext context = dbClient.singleUse()) { + ResultSet rs = context.executeQuery(Statement.of("SELECT COUNT(*) FROM albums")); + assertTrue(rs.next()); + assertEquals(3, rs.getLong(0)); + } + } + + private DatabaseClient getDbClient() { + return spanner + .getDatabaseClient(DatabaseId.of(spannerOptions.getProjectId(), instanceId, databaseId)); + } + +}