feat: added new Dataplex APIs and new features for existing APIs (e.g…

…. DataScans) docs: updated comments for multiple Dataplex APIs PiperOrigin-RevId: 528906555
googleapis · May 2, 2023 · b7429bc · b7429bc
1 parent 3e316f1
commit b7429bc
Show file tree

Hide file tree

Showing 9 changed files with 244 additions and 151 deletions.
diff --git a/google/cloud/dataplex/v1/analyze.proto b/google/cloud/dataplex/v1/analyze.proto
@@ -90,6 +90,7 @@ message Environment {
     }
   }
 
+  // Configuration for sessions created for this environment.
   message SessionSpec {
     // Optional. The idle time configuration of the session. The session will be
     // auto-terminated at the end of this period.
@@ -104,12 +105,14 @@ message Environment {
     bool enable_fast_startup = 2 [(google.api.field_behavior) = OPTIONAL];
   }
 
+  // Status of sessions created for this environment.
   message SessionStatus {
     // Output only. Queries over sessions to mark whether the environment is
     // currently active or not
     bool active = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
   }
 
+  // URI Endpoints to access sessions associated with the Environment.
   message Endpoints {
     // Output only. URI to serve notebook APIs
     string notebooks = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
@@ -244,6 +247,7 @@ message Content {
     string data_text = 9 [(google.api.field_behavior) = REQUIRED];
   }
 
+  // Types of content
   oneof content {
     // Sql Script related configurations.
     SqlScript sql_script = 100;
@@ -276,5 +280,6 @@ message Session {
   google.protobuf.Timestamp create_time = 3
       [(google.api.field_behavior) = OUTPUT_ONLY];
 
+  // Output only. State of Session
   State state = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
 }
diff --git a/google/cloud/dataplex/v1/data_profile.proto b/google/cloud/dataplex/v1/data_profile.proto
@@ -16,6 +16,7 @@ syntax = "proto3";
 
 package google.cloud.dataplex.v1;
 
+import "google/api/field_behavior.proto";
 import "google/cloud/dataplex/v1/processing.proto";
 
 option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
@@ -26,182 +27,159 @@ option java_package = "com.google.cloud.dataplex.v1";
 // DataProfileScan related setting.
 message DataProfileSpec {}
 
-// DataProfileResult defines the output of DataProfileScan.
-// Each field of the table will have field type specific profile result.
+// DataProfileResult defines the output of DataProfileScan. Each field of the
+// table will have field type specific profile result.
 message DataProfileResult {
-  // Profile information describing the structure and layout of the data
-  // and contains the profile info.
+  // Contains name, type, mode and field type specific profile information.
   message Profile {
-    // Represents a column field within a table schema.
+    // A field within a table.
     message Field {
-      // ProfileInfo defines the profile information for each schema field type.
+      // The profile information for each field type.
       message ProfileInfo {
-        // StringFieldInfo defines output info for any string type field.
+        // The profile information for a string type field.
         message StringFieldInfo {
-          // The minimum length of the string field in the sampled data.
-          // Optional if zero non-null rows.
+          // Minimum length of non-null values in the scanned data.
           int64 min_length = 1;
 
-          // The maximum length of a string field in the sampled data.
-          // Optional if zero non-null rows.
+          // Maximum length of non-null values in the scanned data.
           int64 max_length = 2;
 
-          // The average length of a string field in the sampled data.
-          // Optional if zero non-null rows.
+          // Average length of non-null values in the scanned data.
           double average_length = 3;
         }
 
-        // IntegerFieldInfo defines output for any integer type field.
+        // The profile information for an integer type field.
         message IntegerFieldInfo {
-          // The average of non-null values of integer field in the sampled
-          // data. Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Average of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           double average = 1;
 
-          // The standard deviation of non-null of integer field in the sampled
-          // data. Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Standard deviation of non-null values in the scanned data. NaN, if
+          // the field has a NaN.
           double standard_deviation = 3;
 
-          // The minimum value of an integer field in the sampled data.
-          // Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Minimum of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           int64 min = 4;
 
-          // A quartile divide the number of data points into four parts, or
+          // A quartile divides the number of data points into four parts, or
           // quarters, of more-or-less equal size. Three main quartiles used
           // are: The first quartile (Q1) splits off the lowest 25% of data from
           // the highest 75%. It is also known as the lower or 25th empirical
           // quartile, as 25% of the data is below this point. The second
           // quartile (Q2) is the median of a data set. So, 50% of the data lies
           // below this point. The third quartile (Q3) splits off the highest
           // 25% of data from the lowest 75%. It is known as the upper or 75th
-          // empirical quartile, as 75% of the data lies below this point. So,
-          // here the quartiles is provided as an ordered list of quartile
-          // values, occurring in order Q1, median, Q3.
+          // empirical quartile, as 75% of the data lies below this point.
+          // Here, the quartiles is provided as an ordered list of quartile
+          // values for the scanned data, occurring in order Q1, median, Q3.
           repeated int64 quartiles = 6;
 
-          // The maximum value of an integer field in the sampled data.
-          // Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Maximum of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           int64 max = 5;
         }
 
-        // DoubleFieldInfo defines output for any double type field.
+        // The profile information for a double type field.
         message DoubleFieldInfo {
-          // The average of non-null values of double field in the sampled data.
-          // Return NaN, if the field has a NaN. Optional if zero non-null rows.
+          // Average of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           double average = 1;
 
-          // The standard deviation of non-null of double field in the sampled
-          // data. Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Standard deviation of non-null values in the scanned data. NaN, if
+          // the field has a NaN.
           double standard_deviation = 3;
 
-          // The minimum value of a double field in the sampled data.
-          // Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Minimum of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           double min = 4;
 
-          // A quartile divide the numebr of data points into four parts, or
+          // A quartile divides the number of data points into four parts, or
           // quarters, of more-or-less equal size. Three main quartiles used
           // are: The first quartile (Q1) splits off the lowest 25% of data from
           // the highest 75%. It is also known as the lower or 25th empirical
           // quartile, as 25% of the data is below this point. The second
           // quartile (Q2) is the median of a data set. So, 50% of the data lies
           // below this point. The third quartile (Q3) splits off the highest
           // 25% of data from the lowest 75%. It is known as the upper or 75th
-          // empirical quartile, as 75% of the data lies below this point. So,
-          // here the quartiles is provided as an ordered list of quartile
-          // values, occurring in order Q1, median, Q3.
+          // empirical quartile, as 75% of the data lies below this point.
+          // Here, the quartiles is provided as an ordered list of quartile
+          // values for the scanned data, occurring in order Q1, median, Q3.
           repeated double quartiles = 6;
 
-          // The maximum value of a double field in the sampled data.
-          // Return NaN, if the field has a NaN. Optional if zero non-null
-          // rows.
+          // Maximum of non-null values in the scanned data. NaN, if the field
+          // has a NaN.
           double max = 5;
         }
 
-        // The TopNValue defines the structure of output of top N values of a
-        // field.
+        // Top N non-null values in the scanned data.
         message TopNValue {
-          // The value is the string value of the actual value from the field.
+          // String value of a top N non-null value.
           string value = 1;
 
-          // The frequency count of the corresponding value in the field.
+          // Count of the corresponding value in the scanned data.
           int64 count = 2;
         }
 
-        // The ratio of null rows against the rows in the sampled data.
+        // Ratio of rows with null value against total scanned rows.
         double null_ratio = 2;
 
-        // The ratio of rows that are distinct against the rows in the sampled
-        // data.
+        // Ratio of rows with distinct values against total scanned rows.
+        // Not available for complex non-groupable field type RECORD and fields
+        // with REPEATABLE mode.
         double distinct_ratio = 3;
 
-        // The array of top N values of the field in the sampled data.
-        // Currently N is set as 10 or equal to distinct values in the field,
-        // whichever is smaller. This will be optional for complex non-groupable
-        // data-types such as JSON, ARRAY, JSON, STRUCT.
+        // The list of top N non-null values and number of times they occur in
+        // the scanned data. N is 10 or equal to the number of distinct values
+        // in the field, whichever is smaller. Not available for complex
+        // non-groupable field type RECORD and fields with REPEATABLE mode.
         repeated TopNValue top_n_values = 4;
 
-        // The corresponding profile for specific field type.
-        // Each field will have only one field type specific profile output.
+        // Structural and profile information for specific field type. Not
+        // available, if mode is REPEATABLE.
         oneof field_info {
-          // The corresponding string field profile.
+          // String type field information.
           StringFieldInfo string_profile = 101;
 
-          // The corresponding integer field profile.
+          // Integer type field information.
           IntegerFieldInfo integer_profile = 102;
 
-          // The corresponding double field profile.
+          // Double type field information.
           DoubleFieldInfo double_profile = 103;
         }
       }
 
       // The name of the field.
       string name = 1;
 
-      // The field data type. Possible values include:
-      //
-      // * STRING
-      // * BYTE
-      // * INT64
-      // * INT32
-      // * INT16
-      // * DOUBLE
-      // * FLOAT
-      // * DECIMAL
-      // * BOOLEAN
-      // * BINARY
-      // * TIMESTAMP
-      // * DATE
-      // * TIME
-      // * NULL
-      // * RECORD
+      // The data type retrieved from the schema of the data source. For
+      // instance, for a BigQuery native table, it is the [BigQuery Table
+      // Schema](https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablefieldschema).
+      // For a Dataplex Entity, it is the [Entity
+      // Schema](https://cloud.google.com/dataplex/docs/reference/rpc/google.cloud.dataplex.v1#type_3).
       string type = 2;
 
-      // The mode of the field. Its value will be:
-      // REQUIRED, if it is a required field.
-      // NULLABLE, if it is an optional field.
-      // REPEATED, if it is a repeated field.
+      // The mode of the field. Possible values include:
+      //
+      // * REQUIRED, if it is a required field.
+      // * NULLABLE, if it is an optional field.
+      // * REPEATED, if it is a repeated field.
       string mode = 3;
 
-      // The profile information for the corresponding field.
+      // Profile information for the corresponding field.
       ProfileInfo profile = 4;
     }
 
-    // The sequence of fields describing data in table entities.
+    // List of fields with structural and profile information for each field.
     repeated Field fields = 2;
   }
 
-  // The count of all rows in the sampled data.
-  // Return 0, if zero rows.
+  // The count of rows scanned.
   int64 row_count = 3;
 
-  // This represents the profile information per field.
+  // The profile information per field.
   Profile profile = 4;
 
-  // The data scanned for this profile.
+  // The data scanned for this result.
   ScannedData scanned_data = 5;
 }