From f70784ec5bf9c133dc81602e527b0c948eed8977 Mon Sep 17 00:00:00 2001 From: analyzer1 Date: Mon, 16 Sep 2024 12:29:09 -0400 Subject: [PATCH] Feature/PADW-50 Tembo AI Integration (#7) Switching to the OpenAI API standard for LLM API calls. --- .../src/controller/bgw_transformer_client.rs | 46 +- extension/src/utility/guc.rs | 16 + extension/src/utility/mod.rs | 1 + extension/src/utility/ollama_client.rs | 4 + extension/src/utility/openai_client.rs | 446 ++++++++++++++++++ 5 files changed, 497 insertions(+), 16 deletions(-) create mode 100644 extension/src/utility/openai_client.rs diff --git a/extension/src/controller/bgw_transformer_client.rs b/extension/src/controller/bgw_transformer_client.rs index a9ccc03..80131fe 100644 --- a/extension/src/controller/bgw_transformer_client.rs +++ b/extension/src/controller/bgw_transformer_client.rs @@ -8,7 +8,8 @@ use serde::Deserialize; use crate::queries; use crate::model::source_objects; -use crate::utility::ollama_client; +// use crate::utility::ollama_client; +use crate::utility::openai_client; use crate::utility::guc; use regex::Regex; @@ -61,8 +62,6 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { let columns = extract_column_numbers(&table_details_json_str); - - // Identity BK Ordinal Location let mut generation_json_bk_identification: Option = None; let mut identified_business_key_opt: Option = None; @@ -71,22 +70,27 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { while retries < MAX_TRANSFORMER_RETRIES { runtime.block_on(async { // Get Generation - generation_json_bk_identification = match ollama_client::send_request(table_details_json_str.as_str(), ollama_client::PromptTemplate::BKIdentification, &0, &hints).await { - Ok(mut response_json) => { + generation_json_bk_identification = match openai_client::send_request(table_details_json_str.as_str(), openai_client::PromptTemplate::BKIdentification, &0, &hints).await { + Ok(response_json) => { // TODO: Add a function to enable logging. - // let response_json_pretty = serde_json::to_string_pretty(&response_json) - // .expect("Failed to convert Response JSON to Pretty String."); + let response_json_pretty = serde_json::to_string_pretty(&response_json) + .expect("Failed to convert Response JSON to Pretty String."); + log!("Response: {}", response_json_pretty); Some(response_json) }, Err(e) => { - log!("Error in Ollama client request: {}", e); + log!("Error in transformer request, malformed or timed out: {}", e); hints = format!("Hint: Please ensure you provide a JSON response only. This is your {} attempt.", retries + 1); None } }; }); - // let identified_business_key: IdentifiedBusinessKey = serde_json::from_value(generation_json_bk_identification.unwrap()).expect("Not valid JSON"); + + if generation_json_bk_identification.is_none() { + retries += 1; + continue; // Skip to the next iteration + } match serde_json::from_value::(generation_json_bk_identification.clone().unwrap()) { Ok(bk) => { @@ -114,21 +118,26 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { while retries < MAX_TRANSFORMER_RETRIES { runtime.block_on(async { // Get Generation - generation_json_bk_name = match ollama_client::send_request(table_details_json_str.as_str(), ollama_client::PromptTemplate::BKName, &0, &hints).await { - Ok(mut response_json) => { + generation_json_bk_name = match openai_client::send_request(table_details_json_str.as_str(), openai_client::PromptTemplate::BKName, &0, &hints).await { + Ok(response_json) => { // let response_json_pretty = serde_json::to_string_pretty(&response_json) // .expect("Failed to convert Response JSON to Pretty String."); Some(response_json) }, Err(e) => { - log!("Error in Ollama client request: {}", e); + log!("Error in transformer request, malformed or timed out: {}", e); hints = format!("Hint: Please ensure you provide a JSON response only. This is your {} attempt.", retries + 1); None } }; }); + if generation_json_bk_name.is_none() { + retries += 1; + continue; // Skip to the next iteration + } + match serde_json::from_value::(generation_json_bk_name.clone().unwrap()) { Ok(bk) => { business_key_name_opt = Some(bk); @@ -158,12 +167,12 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { runtime.block_on(async { // Get Generation generation_json_descriptor_sensitive = - match ollama_client::send_request( + match openai_client::send_request( table_details_json_str.as_str(), - ollama_client::PromptTemplate::DescriptorSensitive, + openai_client::PromptTemplate::DescriptorSensitive, column, &hints).await { - Ok(mut response_json) => { + Ok(response_json) => { // let response_json_pretty = serde_json::to_string_pretty(&response_json) // .expect("Failed to convert Response JSON to Pretty String."); @@ -171,7 +180,7 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { Some(response_json) }, Err(e) => { - log!("Error in Ollama client request: {}", e); + log!("Error in transformer request, malformed or timed out: {}", e); hints = format!("Hint: Please ensure you provide a JSON response only. This is your {} attempt.", retries + 1); None } @@ -179,6 +188,11 @@ pub extern "C" fn background_worker_transformer_client(_arg: pg_sys::Datum) { // generation_json_descriptors_sensitive.insert(column, generation_json_descriptor_sensitive); }); + if generation_json_descriptor_sensitive.is_none() { + retries += 1; + continue; // Skip to the next iteration + } + match serde_json::from_value::(generation_json_descriptor_sensitive.clone().unwrap()) { Ok(des) => { // business_key_name_opt = Some(des); diff --git a/extension/src/utility/guc.rs b/extension/src/utility/guc.rs index 3da772d..2bd3838 100644 --- a/extension/src/utility/guc.rs +++ b/extension/src/utility/guc.rs @@ -17,11 +17,16 @@ pub static PG_AUTO_DW_TRANSFORMER_SERVER_URL: GucSetting> = GucSet CStr::from_bytes_with_nul_unchecked(b"http://localhost:11434/api/generate\0") })); +// Default not set +pub static PG_AUTO_DW_TRANSFORMER_SERVER_TOKEN: GucSetting> = GucSetting::>::new(None); + // Default model is "mistral" pub static PG_AUTO_DW_MODEL: GucSetting> = GucSetting::>::new(Some(unsafe { CStr::from_bytes_with_nul_unchecked(b"mistral\0") })); + + // Default confidence level value is 0.8 // pub static PG_AUTO_DW_CONFIDENCE_LEVEL: GucSetting = GucSetting::::new(0.8); @@ -55,6 +60,15 @@ pub fn init_guc() { GucFlags::default(), ); + GucRegistry::define_string_guc( + "pg_auto_dw.transformer_server_token", + "Bearer token for authenticating API calls to the Transformer Server for the pg_auto_dw extension.", + "The Bearer token is required for authenticating API calls to the Transformer Server when interacting with the pg_auto_dw extension.", + &PG_AUTO_DW_TRANSFORMER_SERVER_TOKEN, + GucContext::Suset, + GucFlags::default(), + ); + GucRegistry::define_string_guc( "pg_auto_dw.model", "Transformer model for the pg_auto_dw extension.", @@ -83,6 +97,7 @@ pub enum PgAutoDWGuc { DatabaseName, DwSchema, TransformerServerUrl, + TransformerServerToken, Model, // ConfidenceLevel, } @@ -94,6 +109,7 @@ pub fn get_guc(guc: PgAutoDWGuc) -> Option { PgAutoDWGuc::DatabaseName => PG_AUTO_DW_DATABASE_NAME.get(), PgAutoDWGuc::DwSchema => PG_AUTO_DW_DW_SCHEMA.get(), PgAutoDWGuc::TransformerServerUrl => PG_AUTO_DW_TRANSFORMER_SERVER_URL.get(), + PgAutoDWGuc::TransformerServerToken => PG_AUTO_DW_TRANSFORMER_SERVER_TOKEN.get(), PgAutoDWGuc::Model => PG_AUTO_DW_MODEL.get(), // PgAutoDWGuc::ConfidenceLevel => return Some(PG_AUTO_DW_CONFIDENCE_LEVEL.get().to_string()), }; diff --git a/extension/src/utility/mod.rs b/extension/src/utility/mod.rs index 1c1c889..c089672 100644 --- a/extension/src/utility/mod.rs +++ b/extension/src/utility/mod.rs @@ -1,3 +1,4 @@ pub mod ollama_client; +pub mod openai_client; pub mod setup; pub mod guc; \ No newline at end of file diff --git a/extension/src/utility/ollama_client.rs b/extension/src/utility/ollama_client.rs index 383a9be..e64d99a 100644 --- a/extension/src/utility/ollama_client.rs +++ b/extension/src/utility/ollama_client.rs @@ -4,6 +4,8 @@ use std::time::Duration; use crate::utility::guc; +use pgrx::prelude::*; + #[derive(Serialize, Debug)] pub struct GenerateRequest { pub model: String, @@ -40,6 +42,8 @@ pub async fn send_request(new_json: &str, template_type: PromptTemplate, col: &u .replace("{column_no}", &column_number) .replace("{hints}", &hints); + log!("Prompt: {prompt}"); + // GUC Values for the transformer server let transformer_server_url = guc::get_guc(guc::PgAutoDWGuc::TransformerServerUrl).ok_or("GUC: Transformer Server URL is not set")?; let model = guc::get_guc(guc::PgAutoDWGuc::Model).ok_or("MODEL GUC is not set.")?; diff --git a/extension/src/utility/openai_client.rs b/extension/src/utility/openai_client.rs new file mode 100644 index 0000000..1bf9608 --- /dev/null +++ b/extension/src/utility/openai_client.rs @@ -0,0 +1,446 @@ +use reqwest::ClientBuilder; +use serde::{Deserialize, Serialize}; +use std::time::Duration; + +use crate::utility::guc; +use pgrx::prelude::*; + +#[derive(Serialize, Debug)] +pub struct Request { + pub model: String, // Model name for OpenAI + pub messages: Vec, // List of messages for chat format + pub temperature: f64, // Temperature setting + pub response_format: ResponseFormat, // JSON-only response format field +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Message { + pub role: String, // "user", "assistant", or "system" + pub content: String, // The actual prompt or message content +} + +#[derive(Serialize, Debug)] +pub struct ResponseFormat { + #[serde(rename = "type")] + pub r#type: String, // To ensure JSON response format +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Response { + pub id: String, // Unique identifier for the chat session + pub object: String, // Object type, usually "chat.completion" + pub created: u64, // Timestamp when the response was created + pub model: String, // Model name used for the response + pub choices: Vec, // List of choices (contains the actual answer) + pub usage: Usage, // Information about token usage +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct Choice { + pub message: Message, // Contains the assistant's message + pub finish_reason: Option, // Reason for stopping (e.g., "stop") + pub index: usize, // Index of the choice + pub logprobs: Option, // Log probabilities (if applicable) +} + + +#[derive(Serialize, Deserialize, Debug)] +pub struct Usage { + pub prompt_tokens: u32, // Number of tokens in the prompt + pub completion_tokens: u32, // Number of tokens in the completion + pub total_tokens: u32, // Total number of tokens used +} + +pub async fn send_request(new_json: &str, template_type: PromptTemplate, col: &u32, hints: &str) -> Result> { + + let client = ClientBuilder::new().timeout(Duration::from_secs(60)).build()?; // 30 sec Default to short for some LLMS. + + let prompt_template = template_type.template(); + // let prompt_template = PromptTemplate::Test.template(); + + // Inject new_json into the prompt_template' + let column_number = col.to_string(); + let prompt = prompt_template + .replace("{new_json}", new_json) + .replace("{column_no}", &column_number) + .replace("{hints}", &hints); + + // GUC Values for the transformer server + let transformer_server_url = guc::get_guc(guc::PgAutoDWGuc::TransformerServerUrl).ok_or("GUC: Transformer Server URL is not set.")?; + let transformer_server_token = guc::get_guc(guc::PgAutoDWGuc::TransformerServerToken).ok_or("GUC: Transformer Server Token is not set.")?; + + let model = guc::get_guc(guc::PgAutoDWGuc::Model).ok_or("MODEL GUC is not set.")?; + + let json_type = String::from("json_object"); + let response_format = ResponseFormat { r#type: json_type,}; + + let temperature: f64 = 0.75; + + let role = String::from("user"); + + let message = Message { + role, + content: prompt, + }; + + let messages = vec![message]; + + let request = Request { + model, + messages, + temperature, + response_format, + }; + + log!("Request URL: {}", transformer_server_url); + log!("Request Headers:"); + // log!(" Authorization: Bearer {}", transformer_server_token); + log!(" Content-Type: application/json"); + log!("Request Body: {}", serde_json::to_string(&request).unwrap()); + + let response = client + .post(&transformer_server_url) // Ensure this is updated to OpenAI's URL + .header("Authorization", format!("Bearer {}", transformer_server_token)) // Add Bearer token here + .header("Content-Type", "application/json") // Specify JSON content type + .json(&request) // Send the request body as JSON + .send() + .await? + .json::() // Await the response and parse it as JSON + .await?; + + log!("Response: {}", serde_json::to_string(&response).unwrap()); + + // let response_json: serde_json::Value = serde_json::to_value(&response)?; + + // Extract the content string + let content_str = &response + .choices + .get(0) + .ok_or("No choices in response")? + .message + .content; + + // Parse the content string into serde_json::Value + let content_json: serde_json::Value = serde_json::from_str(content_str)?; + + Ok(content_json) +} + +#[derive(Debug)] +pub enum PromptTemplate { + BKIdentification, + BKName, + DescriptorSensitive, + Test, +} + +impl PromptTemplate { + fn template(&self) -> &str { + match self { + PromptTemplate::BKIdentification => r#" + Task Title: Business Key Identification in JSON Source Table Object + + You have a JSON Source Table Object that includes the schema name, table name, and detailed column information. Your responses to requested tasks will be used to help create downstream data vault tables. + + Requested Task: Identify the column number most likely to serve as the business key. Return only one column in JSON format as specified below. + + + Request Details: + If the column is a primary key, assume it is the business key. If not, choose the column most likely to uniquely identify the table’s entity. Additionally, provide a confidence value for your selection. + + Confidence Value: Provide a score between 0 and 1, rounded to two decimal places, representing your confidence in the selected column. A value of 0.80 or higher is considered reasonably confident. + + + Reason: Indicate why you made the decision you did. + + Output: Ensure the output conforms to the format shown in the examples below. + + Example Input 1) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "customer", + "Column Details": [ + "Column No: 1 Named: customer_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: city of type: character varying(255) Column Comments: NA", + "Column No: 3 Named: state of type: character(2) Column Comments: NA", + "Column No: 4 Named: zip of type: character varying(10) Column Comments: NA" + ] + } + + Example Output 1) + { + "Identified Business Key": { + "Column No": 1, + "Confidence Value": 0.95, + "Reason": "The 'customer_id' column is designated as the primary key, which is typically the best candidate for a business key." + } + } + + Example Input 2) + JSON Source Table Object: + { + "Schema Name": "sales", + "Table Name": "order_details", + "Column Details": [ + "Column No: 1 Named: order_id of type: integer Column Comments: NA", + "Column No: 2 Named: product_id of type: integer Column Comments: NA", + "Column No: 3 Named: quantity of type: integer Column Comments: NA", + "Column No: 4 Named: order_date of type: date Column Comments: NA" + ] + } + + Example Output 2) + { + "Identified Business Key": { + "Column No": 1, + "Confidence Value": 0.75, + "Reason": "Although 'order_id' is not explicitly marked as a primary key, it is likely to uniquely identify each order, making it a strong candidate for the business key." + } + } + + Now, based on the instructions and examples above, please generate the JSON output for the following input. {hints} + + JSON Source Table Object: {new_json} + "#, + PromptTemplate::BKName => r#" + Task Title: Business Key Naming in JSON Source Table Object with specified Column + + You have a JSON Source Table Object that includes the schema name, table name, and detailed column information. Your responses to requested tasks will be used to help create downstream data vault tables. + + Requested Task: Identify the business key name. The business key part column has already been identified, and its associated column number, “column no”, will be provided along with the JSON Source Table Object. Return a name that best represents the business key from a data vault perspective. + + Request Details: + + The Business Key Name should be crafted based on the attribute linked to the business key, as identified by the provided column number. Prioritize the attribute name over the table name if the attribute name is descriptive enough. It should clearly represent the core business entity, avoiding generic terms like “ID,” “number,” or “Entity.” The name should focus solely on the business aspect, using terms like “customer,” “employee,” or “seller” that directly reflect the entity’s purpose, without unnecessary suffixes or identifiers. If the attribute associated with the business key or its column comments are not descriptive enough, the table name or schema name can be used to help formulate the Business Key Name. + + Confidence Value: Provide a score between 0 and 1, rounded to two decimal places, representing your confidence in your chosen Business Key Name. A value of 0.80 or higher is considered reasonably confident. + + + Reason: Indicate why you made the decision you did. + + Output: Ensure the output conforms to the format shown in the examples below. + + Example Input 1) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "customer", + "Column Details": [ + "Column No: 1 Named: customer_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: city of type: character varying(255) Column Comments: NA", + "Column No: 3 Named: state of type: character(2) Column Comments: NA", + "Column No: 4 Named: zip of type: character varying(10) Column Comments: NA" + ] + } + + Column No: 1 + + Example Output 1) + { + "Business Key Name": { + "Name": "Customer", + "Confidence Value": 0.9, + "Reason": "The column 'customer_id' is a primary key and represents the unique identifier for customers in the 'customer' table. Given that the table name 'customer' directly reflects the business entity, 'Customer' is chosen as the Business Key Name. The confidence value is high because the identifier is straightforward and strongly aligned with the core business entity." + } + } + + Example Input 2) + JSON Source Table Object: + { + "Schema Name": "sales", + "Table Name": "order_details", + "Column Details": [ + "Column No: 1 Named: id of type: integer Column Comments: NA", + "Column No: 2 Named: product_id of type: integer Column Comments: NA", + "Column No: 3 Named: quantity of type: integer Column Comments: NA", + "Column No: 4 Named: order_date of type: date Column Comments: NA" + ] + } + + Column No: 1 + + Example Output 2) + { + "Business Key Name": { + "Name": "Order", + "Confidence Value": 0.85, + "Reason": "The column 'id' is a primary key and serves as the unique identifier for records in the 'order_details' table. Although the column name 'id' is generic, the table name 'order_details' indicates that the records pertain to individual orders. Therefore, 'Order' is chosen as the Business Key Name to best represent the core business entity. The confidence value is slightly lower due to the generic nature of the column name, but it is still reasonably confident given the context provided by the table name." + } + } + + Now, based on the instructions and examples above, please generate the JSON output for the following input. {hints} + + JSON Source Table Object: {new_json} + + Column No: {column_no} + "#, + PromptTemplate::DescriptorSensitive => r#" + Task Title: Identification of PII in JSON Source Table Object + + You have a JSON Source Table Object that includes the schema name, table name, and detailed column information. Your task is to assist in the creation of downstream data vault tables by performing the requested tasks based on this information. + + Requested Task: Identify if the descriptor is a descriptor sensitive PII subtype. A descriptor column, along with its associated column number (“column no”), will be provided in the JSON Source Table Object. If you determine that the column contains Personally Identifiable Information (PII), categorize it as “Descriptor - Sensitive.” + + Request Details: + PII Identification: Only consider a column as PII if it directly matches an item from the PII list provided below. Do not infer or project beyond this list. If a column name or its associated comment closely resembles an item from the list, classify it as PII. + No Overgeneralization: Avoid overgeneralization or inference beyond what is explicitly stated in the list. Focus strictly on the provided PII list. + + Personal Identifiable Information (PII) List: + + Consider any of the following types of information as PII and categorize the corresponding column as “Descriptor - Sensitive”: + + - Person’s Name: PII (Includes first name, last name, or both). + - Social Security Number (SSN): PII + - Driver’s License Number: PII + - Passport Number: PII + - Email Address: PII + - Physical Street Address: PII (Includes street address, but excludes City, State, or standard 5-digit Zip code). + - Extended Zip Code: PII (Any Zip code with more than 5 digits). + - Telephone Number: PII (Includes both landline and mobile numbers). + - Date of Birth: PII + - Place of Birth: PII + - Biometric Data: PII (Includes fingerprints, facial recognition data, iris scans). + - Medical Information: PII (Includes health records, prescriptions). + - Financial Information: PII (Includes bank account numbers, credit card numbers, debit card numbers). + - Employment Information: PII (Includes employment records, salary information). + - Insurance Information: PII (Includes policy numbers, claim information). + - Education Records: PII (Includes student records, transcripts). + - Online Identifiers: PII (Includes usernames, IP addresses, cookies, MAC addresses). + - Photographs or Videos: PII (Any media that can identify an individual). + - National Identification Numbers: PII (Includes identifiers outside of SSN, such as National Insurance Numbers in the UK). + - Geolocation Data: PII (Includes GPS coordinates, location history). + - Vehicle Registration Numbers: PII + + Not PII: + + Some data may seem personally identifiable; however, it is not specific enough to identify an individual. + + - Standard 5-Digit Zip Code: Not PII + - City: Not PII + - State: Not PII + - Country: Not PII + - Age (in years): Not PII (Unless combined with other identifiers like date of birth). + - Date or Timestamp (Example: created_date, created_timestamp, update_Date, update_timestamp): Not PII (Unless combined with other identiviers like date of birth) + - Gender: Not PII + - Ethnicity/Race: Not PII (General categories, e.g., “Caucasian,” “Asian,” without additional identifiers). + - Publicly Available Information: Not PII (Any information that is lawfully made available from federal, state, or local government records). + - Generic Job Titles: Not PII (Titles like “Manager,” “Engineer,” without additional identifying details). + - Company/Organization Name: Not PII (Names of companies or organizations without personal identifiers). + + Confidence Value: Provide a score between 0 and 1, rounded to two decimal places, representing your confidence in your “Is PII” determination of true or false. A value of 0.80 or higher is considered reasonably confident in your true or false answer. + + + Reason: Indicate why you made the decision you did. + + Output: Please ensure that your output is JSON and matches the structure of the output examples provided. + + Example Input 1) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "customer", + "Column Details": [ + "Column No: 1 Named: customer_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: city of type: character varying(255) Column Comments: NA", + "Column No: 3 Named: state of type: character(2) Column Comments: NA", + "Column No: 4 Named: zip of type: character varying(10) Column Comments: NA" + ] + } + + Column No: 4 + + Example Output 1) + { + "Descriptor - Sensitive": { + "Is PII": true, + "Confidence Value": 0.85, + "Reason": "The 'zip' column is identified as PII because its data type, character varying(10), allows for the possibility of storing extended zip codes, which matches an item on the provided PII list." + } + } + + Example Input 2) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "customer", + "Column Details": [ + "Column No: 1 Named: customer_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: city of type: character varying(255) Column Comments: NA", + "Column No: 3 Named: state of type: character(2) Column Comments: NA", + "Column No: 4 Named: zip of type: character varying(10) Column Comments: NA" + ] + } + + Column No: 2 + + Example Output 2) + { + "Descriptor - Sensitive": { + "Is PII": false, + "Confidence Value": 0.90, + "Reason": "The 'city' column is not considered PII because city names do not match any item on the provided PII list." + } + } + + Example Input 3) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "employee", + "Column Details": [ + "Column No: 1 Named: employee_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: full_name of type: character varying(255) Column Comments: NA", + "Column No: 3 Named: email of type: character varying(255) Column Comments: NA", + "Column No: 4 Named: salary of type: numeric Column Comments: NA" + ] + } + + Column No: 2 + + Example Output 3) + { + "Descriptor - Sensitive": { + "Is PII": true, + "Confidence Value": 0.95, + "Reason": "The 'full_name' column is identified as PII because it matches the 'Person's Name' item from the provided PII list." + } + } + + Example Input 4) + JSON Source Table Object: + { + "Schema Name": "public", + "Table Name": "order", + "Column Details": [ + "Column No: 1 Named: order_id of type: uuid And is a primary key. Column Comments: NA", + "Column No: 2 Named: order_date of type: date Column Comments: NA", + "Column No: 3 Named: customer_email of type: character varying(255) Column Comments: 'Email address of the customer who placed the order'", + "Column No: 4 Named: total_amount of type: numeric Column Comments: NA" + ] + } + + Column No: 3 + + Example Output 4) + { + "Descriptor - Sensitive": { + "Is PII": true, + "Confidence Value": 0.98, + "Reason": "The 'customer_email' column is identified as PII because it matches the 'Email Address' item from the provided PII list." + } + } + + Now, based on the instructions and examples above, please generate the appropriate JSON output only for the following JSON Source Table Object and Column No inputs. {hints} + + JSON Source Table Object: {new_json} + + Column No: {column_no} + + "#, + PromptTemplate::Test => r#"Why is the sky blue? Only respond in PROPER JSON FORMAT."#, + } + } +} + +