Skip to content
This repository has been archived by the owner on Sep 27, 2023. It is now read-only.

feat: new Bytes and File types: POWERPOINT and EXCEL #693

Merged
merged 2 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions protos/google/privacy/dlp/v2/dlp.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -16,6 +16,7 @@ syntax = "proto3";

package google.privacy.dlp.v2;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
Expand All @@ -28,7 +29,6 @@ import "google/rpc/status.proto";
import "google/type/date.proto";
import "google/type/dayofweek.proto";
import "google/type/timeofday.proto";
import "google/api/annotations.proto";

option csharp_namespace = "Google.Cloud.Dlp.V2";
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp";
Expand Down Expand Up @@ -719,8 +719,8 @@ message InspectionRuleSet {
// When used with redactContent only info_types and min_likelihood are currently
// used.
message InspectConfig {
// Configuration to control the number of findings returned. Cannot be set if
// de-identification is requested.
// Configuration to control the number of findings returned for inspection.
// This is not used for de-identification or data profiling.
message FindingLimits {
// Max findings configuration per infoType, per content item or long
// running DlpJob.
Expand Down Expand Up @@ -769,21 +769,23 @@ message InspectConfig {
Likelihood min_likelihood = 2;

// Configuration to control the number of findings returned.
// This is not used for data profiling.
FindingLimits limits = 3;

// When true, a contextual quote from the data that triggered a finding is
// included in the response; see Finding.quote.
// This is not used for data profiling.
bool include_quote = 4;

// When true, excludes type information of the findings.
// This is not used for data profiling.
bool exclude_info_types = 5;

// CustomInfoTypes provided by the user. See
// https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
repeated CustomInfoType custom_info_types = 6;

// List of options defining data content to scan.
// If empty, text, images, and other content will be included.
// Deprecated and unused.
repeated ContentOption content_options = 8;

// Set of rules to apply to the findings for this InspectConfig.
Expand Down Expand Up @@ -825,6 +827,12 @@ message ByteContentItem {
// pdf
PDF = 8;

// pptx, pptm, potx, potm, pot
POWERPOINT_DOCUMENT = 9;

// xlsx, xlsm, xltx, xltm
EXCEL_DOCUMENT = 10;

// avro
AVRO = 11;

Expand Down Expand Up @@ -2857,6 +2865,18 @@ message TransformationOverview {
// Only one of 'transformation', 'field_transformation', or 'record_suppress'
// will be set.
message TransformationSummary {
// Possible outcomes of transformations.
enum TransformationResultCode {
// Unused
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;

// Transformation completed without an error.
SUCCESS = 1;

// Transformation had an error.
ERROR = 2;
}

// A collection that informs the user the number of times a particular
// `TransformationResultCode` and error details occurred.
message SummaryResult {
Expand All @@ -2871,18 +2891,6 @@ message TransformationSummary {
string details = 3;
}

// Possible outcomes of transformations.
enum TransformationResultCode {
// Unused
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;

// Transformation completed without an error.
SUCCESS = 1;

// Transformation had an error.
ERROR = 2;
}

// Set if the transformation was limited to a specific InfoType.
InfoType info_type = 1;

Expand Down Expand Up @@ -4352,7 +4360,7 @@ enum MatchingType {
MATCHING_TYPE_INVERSE_MATCH = 3;
}

// Options describing which parts of the provided content should be scanned.
// Deprecated and unused.
enum ContentOption {
// Includes entire content of a file or a data stream.
CONTENT_UNSPECIFIED = 0;
Expand Down
154 changes: 82 additions & 72 deletions protos/google/privacy/dlp/v2/storage.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2021 Google LLC
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,6 @@ syntax = "proto3";

package google.privacy.dlp.v2;

import "google/api/annotations.proto";
import "google/api/resource.proto";
import "google/protobuf/timestamp.proto";

Expand All @@ -41,18 +40,6 @@ message InfoType {
string version = 2;
}

// A reference to a StoredInfoType to use with scanning.
message StoredType {
// Resource name of the requested `StoredInfoType`, for example
// `organizations/433245324/storedInfoTypes/432452342` or
// `projects/project-id/storedInfoTypes/432452342`.
string name = 1;

// Timestamp indicating when the version of the `StoredInfoType` used for
// inspection was created. Output-only field, populated by the system.
google.protobuf.Timestamp create_time = 2;
}

// Categorization of results based on how likely they are to represent a match,
// based on the number of elements they contain which imply a match.
enum Likelihood {
Expand All @@ -73,6 +60,18 @@ enum Likelihood {
VERY_LIKELY = 5;
}

// A reference to a StoredInfoType to use with scanning.
message StoredType {
// Resource name of the requested `StoredInfoType`, for example
// `organizations/433245324/storedInfoTypes/432452342` or
// `projects/project-id/storedInfoTypes/432452342`.
string name = 1;

// Timestamp indicating when the version of the `StoredInfoType` used for
// inspection was created. Output-only field, populated by the system.
google.protobuf.Timestamp create_time = 2;
}

// Custom information type provided by the user. Used to find domain-specific
// sensitive information configurable to the data in question.
message CustomInfoType {
Expand All @@ -85,7 +84,7 @@ message CustomInfoType {
// Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
// will be replaced with whitespace when scanning for matches, so the
// dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
// Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
// "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
// surrounding any match must be of a different type than the adjacent
// characters within the word, so letters must be next to non-letters and
// digits next to non-digits. For example, the dictionary word "jen" will
Expand All @@ -98,7 +97,7 @@ message CustomInfoType {
// [limits](https://cloud.google.com/dlp/limits) page contains details about
// the size limits of dictionaries. For dictionaries that do not fit within
// these constraints, consider using `LargeCustomDictionaryConfig` in the
// [limits](https://cloud.google.com/dlp/limits) page contains details about
// `StoredInfoType` API.
message Dictionary {
// Message defining a list of words or phrases to search for in the data.
message WordList {
Expand All @@ -124,7 +123,7 @@ message CustomInfoType {
// (https://github.com/google/re2/wiki/Syntax) can be found under the
// google/re2 repository on GitHub.
string pattern = 1;
// (https://github.com/google/re2/wiki/Syntax) can be found under the

// The index of the submatch to extract as findings. When not
// specified, the entire match is returned. No more than 3 may be included.
repeated int32 group_indexes = 2;
Expand All @@ -135,10 +134,12 @@ message CustomInfoType {
// [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
// These types of transformations are
// those that perform pseudonymization, thereby producing a "surrogate" as
// [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
// output. This should be used in conjunction with a field on the
// transformation such as `surrogate_info_type`. This CustomInfoType does
// not support the use of `detection_rules`.
message SurrogateType {}
message SurrogateType {

}

// Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
// `CustomInfoType` to alter behavior under certain circumstances, depending
Expand Down Expand Up @@ -284,6 +285,67 @@ message DatastoreOptions {
KindExpression kind = 2;
}

// Definitions of file type groups to scan. New types will be added to this
// list.
enum FileType {
// Includes all files.
FILE_TYPE_UNSPECIFIED = 0;

// Includes all file extensions not covered by another entry. Binary
// scanning attempts to convert the content of the file to utf_8 to scan
// the file.
// If you wish to avoid this fall back, specify one or more of the other
// FileType's in your storage scan.
BINARY_FILE = 1;

// Included file extensions:
// asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
// dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
// mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
// properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
// shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
// txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
TEXT_FILE = 2;

// Included file extensions:
// bmp, gif, jpg, jpeg, jpe, png.
// bytes_limit_per_file has no effect on image files.
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
IMAGE = 3;

// Word files >30 MB will be scanned as binary files.
// Included file extensions:
// docx, dotx, docm, dotm
WORD = 5;

// PDF files >30 MB will be scanned as binary files.
// Included file extensions:
// pdf
PDF = 6;

// Included file extensions:
// avro
AVRO = 7;

// Included file extensions:
// csv
CSV = 8;

// Included file extensions:
// tsv
TSV = 9;

// Powerpoint files >30 MB will be scanned as binary files.
// Included file extensions:
// pptx, pptm, potx, potm, pot
POWERPOINT = 11;

// Excel files >30 MB will be scanned as binary files.
// Included file extensions:
// xlsx, xlsm, xltx, xltm
EXCEL = 12;
}

// Message representing a set of files in a Cloud Storage bucket. Regular
// expressions are used to allow fine-grained control over which files in the
// bucket to include.
Expand Down Expand Up @@ -330,7 +392,7 @@ message CloudStorageRegexFileSet {
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
// under the google/re2 repository on GitHub.
repeated string include_regex = 2;
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found

// A list of regular expressions matching file paths to exclude. All files in
// the bucket that match at least one of these regular expressions will be
// excluded from the scan.
Expand All @@ -339,7 +401,6 @@ message CloudStorageRegexFileSet {
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
// under the google/re2 repository on GitHub.
repeated string exclude_regex = 3;
// [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
}

// Options defining a file or a set of files within a Google Cloud Storage
Expand Down Expand Up @@ -531,57 +592,6 @@ message StorageConfig {
TimespanConfig timespan_config = 6;
}

// Definitions of file type groups to scan. New types will be added to this
// list.
enum FileType {
// Includes all files.
FILE_TYPE_UNSPECIFIED = 0;

// Includes all file extensions not covered by another entry. Binary
// scanning attempts to convert the content of the file to utf_8 to scan
// the file.
// If you wish to avoid this fall back, specify one or more of the other
// FileType's in your storage scan.
BINARY_FILE = 1;

// Included file extensions:
// asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
// dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
// mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
// properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex,
// shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
// txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
TEXT_FILE = 2;

// Included file extensions:
// bmp, gif, jpg, jpeg, jpe, png.
// bytes_limit_per_file has no effect on image files.
// Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
IMAGE = 3;

// Word files >30 MB will be scanned as binary files.
// Included file extensions:
// docx, dotx, docm, dotm
WORD = 5;

// PDF files >30 MB will be scanned as binary files.
// Included file extensions:
// pdf
PDF = 6;

// Included file extensions:
// avro
AVRO = 7;

// Included file extensions:
// csv
CSV = 8;

// Included file extensions:
// tsv
TSV = 9;
}

// Configuration to control jobs where the content being inspected is outside
// of Google Cloud Platform.
message HybridOptions {
Expand Down
Loading