forked from langchain-ai/langchainjs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from Ally-Financial/feature/masking
Feature/masking
- Loading branch information
Showing
13 changed files
with
998 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
sidebar_position: 6 | ||
--- | ||
|
||
# Experimental |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Masking | ||
|
||
The experimental masking parser and transformer is an extendable module for masking and rehydrating strings. One of the primary use cases for this module is to redact PII (Personal Identifiable Information) from a string before making a call to an llm. | ||
|
||
### Real world scenario | ||
|
||
A customer support system receives messages containing sensitive customer information. The system must parse these messages, mask any PII (like names, email addresses, and phone numbers), and log them for analysis while complying with privacy regulations. Before logging the transcript a summary is generated using an llm. | ||
|
||
## Example | ||
|
||
Use the RegexMaskingTransformer to create a simple mask for email and phone. | ||
|
||
```typescript | ||
import { | ||
MaskingParser, | ||
RegexMaskingTransformer, | ||
} from "langchain/experimental/masking"; | ||
|
||
// Define masking strategy | ||
const emailMask = () => `[email-${Math.random().toString(16).slice(2)}]`; | ||
const phoneMask = () => `[phone-${Math.random().toString(16).slice(2)}]`; | ||
|
||
// Configure pii transformer | ||
const piiMaskingTransformer = new RegexMaskingTransformer({ | ||
email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, | ||
phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, | ||
}); | ||
|
||
maskingParser = new MaskingParser(); | ||
maskingParser.addTransformer(piiMaskingTransformer); | ||
|
||
const input = | ||
"Contact me at [email protected] or 555-123-4567. Also reach me at [email protected]"; | ||
const masked = await maskingParser.parse(message); | ||
|
||
console.log(masked); | ||
// Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] | ||
|
||
const rehydrated = maskingParser.rehydrate(masked); | ||
console.log(rehydrated); | ||
// Contact me at [email protected] or 555-123-4567. Also reach me at [email protected] | ||
``` | ||
|
||
:::note | ||
If you plan on storing the masking state to rehydrate the original values asynchronously ensure you are following best security practices. In most cases you will want to define a custom hashing and salting strategy. | ||
::: | ||
|
||
## Get started | ||
|
||
import CodeBlock from "@theme/CodeBlock"; | ||
import ExampleKitchenSink from "@examples/experimental/masking/kitchen_sink.ts"; | ||
import ExampleNext from "@examples/experimental/masking/next.ts"; | ||
import ExampleStream from "@examples/chains/llm_chain_stream.ts"; | ||
import ExampleCancellation from "@examples/chains/llm_chain_cancellation.ts"; | ||
|
||
### Next.js stream | ||
|
||
Example nextjs chat endpoint leveraging the RegexMaskingTransformer. The current chat message and chat message history are masked every time the api is called with a chat payload. | ||
|
||
<CodeBlock language="typescript">{ExampleNext}</CodeBlock> | ||
|
||
### Kitchen sink | ||
|
||
<CodeBlock language="typescript">{ExampleKitchenSink}</CodeBlock> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import { | ||
MaskingParser, | ||
RegexMaskingTransformer, | ||
} from "langchain/experimental/masking"; | ||
|
||
// A simple hash function for demonstration purposes | ||
function simpleHash(input: string): string { | ||
let hash = 0; | ||
for (let i = 0; i < input.length; i++) { | ||
const char = input.charCodeAt(i); | ||
hash = (hash << 5) - hash + char; | ||
hash |= 0; // Convert to 32bit integer | ||
} | ||
return hash.toString(16); | ||
} | ||
|
||
const emailMask = (match: string) => `[email-${simpleHash(match)}]`; | ||
const phoneMask = (match: string) => `[phone-${simpleHash(match)}]`; | ||
const nameMask = (match: string) => `[name-${simpleHash(match)}]`; | ||
const ssnMask = (match: string) => `[ssn-${simpleHash(match)}]`; | ||
const creditCardMask = (match: string) => `[creditcard-${simpleHash(match)}]`; | ||
const passportMask = (match: string) => `[passport-${simpleHash(match)}]`; | ||
const licenseMask = (match: string) => `[license-${simpleHash(match)}]`; | ||
const addressMask = (match: string) => `[address-${simpleHash(match)}]`; | ||
const dobMask = (match: string) => `[dob-${simpleHash(match)}]`; | ||
const bankAccountMask = (match: string) => `[bankaccount-${simpleHash(match)}]`; | ||
|
||
// Regular expressions for different types of PII | ||
const patterns = { | ||
email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, | ||
phone: { regex: /\b\d{3}-\d{3}-\d{4}\b/g, mask: phoneMask }, | ||
name: { regex: /\b[A-Z][a-z]+ [A-Z][a-z]+\b/g, mask: nameMask }, | ||
ssn: { regex: /\b\d{3}-\d{2}-\d{4}\b/g, mask: ssnMask }, | ||
creditCard: { regex: /\b(?:\d{4}[ -]?){3}\d{4}\b/g, mask: creditCardMask }, | ||
passport: { regex: /(?i)\b[A-Z]{1,2}\d{6,9}\b/g, mask: passportMask }, | ||
license: { regex: /(?i)\b[A-Z]{1,2}\d{6,8}\b/g, mask: licenseMask }, | ||
address: { | ||
regex: /\b\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+)\*\b/g, | ||
mask: addressMask, | ||
}, | ||
dob: { regex: /\b\d{4}-\d{2}-\d{2}\b/g, mask: dobMask }, | ||
bankAccount: { regex: /\b\d{8,17}\b/g, mask: bankAccountMask }, | ||
}; | ||
|
||
// Create a RegexMaskingTransformer with multiple patterns | ||
const piiMaskingTransformer = new RegexMaskingTransformer(patterns); | ||
|
||
// Hooks for different stages of masking and rehydrating | ||
const onMaskingStart = (message: string) => | ||
console.log(`Starting to mask message: ${message}`); | ||
const onMaskingEnd = (maskedMessage: string) => | ||
console.log(`Masked message: ${maskedMessage}`); | ||
const onRehydratingStart = (message: string) => | ||
console.log(`Starting to rehydrate message: ${message}`); | ||
const onRehydratingEnd = (rehydratedMessage: string) => | ||
console.log(`Rehydrated message: ${rehydratedMessage}`); | ||
|
||
// Initialize MaskingParser with the transformer and hooks | ||
const maskingParser = new MaskingParser({ | ||
transformers: [piiMaskingTransformer], | ||
onMaskingStart, | ||
onMaskingEnd, | ||
onRehydratingStart, | ||
onRehydratingEnd, | ||
}); | ||
|
||
// Example message containing multiple types of PII | ||
const message = | ||
"Contact Jane Doe at [email protected] or 555-123-4567. Her SSN is 123-45-6789 and her credit card number is 1234-5678-9012-3456. Passport number: AB1234567, Driver's License: X1234567, Address: 123 Main St, Date of Birth: 1990-01-01, Bank Account: 12345678901234567."; | ||
|
||
// Mask and rehydrate the message | ||
maskingParser | ||
.parse(message) | ||
.then((maskedMessage: string) => { | ||
console.log(`Masked message: ${maskedMessage}`); | ||
return maskingParser.rehydrate(maskedMessage); | ||
}) | ||
.then((rehydratedMessage: string) => { | ||
console.log(`Final rehydrated message: ${rehydratedMessage}`); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// app/api/chat | ||
|
||
import { | ||
MaskingParser, | ||
RegexMaskingTransformer, | ||
} from "langchain/experimental/masking"; | ||
import { PromptTemplate } from "langchain/prompts"; | ||
import { ChatOpenAI } from "langchain/chat_models/openai"; | ||
import { BytesOutputParser } from "langchain/schema/output_parser"; | ||
|
||
export const runtime = "edge"; | ||
|
||
// Function to format chat messages for consistency | ||
const formatMessage = (message: any) => { | ||
return `${message.role}: ${message.content}`; | ||
}; | ||
|
||
const CUSTOMER_SUPPORT = `You are a customer support summarizer agent. Always include masked PII in your response. | ||
Current conversation: | ||
{chat_history} | ||
User: {input} | ||
AI:`; | ||
|
||
// Configure Masking Parser | ||
const maskingParser = new MaskingParser(); | ||
// Define transformations for masking emails and phone numbers using regular expressions | ||
const piiMaskingTransformer = new RegexMaskingTransformer({ | ||
email: { regex: /\S+@\S+\.\S+/g }, // If a regex is provided without a mask we fallback to a simple default hashing function | ||
phone: { regex: /\d{3}-\d{3}-\d{4}/g }, | ||
}); | ||
|
||
maskingParser.addTransformer(piiMaskingTransformer); | ||
|
||
export async function POST(req: Request) { | ||
try { | ||
const body = await req.json(); | ||
const messages = body.messages ?? []; | ||
const formattedPreviousMessages = messages.slice(0, -1).map(formatMessage); | ||
const currentMessageContent = messages[messages.length - 1].content; // Extract the content of the last message | ||
// Mask sensitive information in the current message | ||
const guardedMessageContent = await maskingParser.parse( | ||
currentMessageContent | ||
); | ||
// Mask sensitive information in the chat history | ||
const guardedHistory = await maskingParser.parse( | ||
formattedPreviousMessages.join("\n") | ||
); | ||
|
||
const prompt = PromptTemplate.fromTemplate(CUSTOMER_SUPPORT); | ||
const model = new ChatOpenAI({ temperature: 0.8 }); | ||
// Initialize an output parser that handles serialization and byte-encoding for streaming | ||
const outputParser = new BytesOutputParser(); | ||
const chain = prompt.pipe(model).pipe(outputParser); // Chain the prompt, model, and output parser together | ||
|
||
console.log("[GUARDED INPUT]", guardedMessageContent); // Contact me at -1157967895 or -1626926859. | ||
console.log("[GUARDED HISTORY]", guardedHistory); // user: Contact me at -1157967895 or -1626926859. assistant: Thank you for providing your contact information. | ||
console.log("[STATE]", maskingParser.getState()); // { '-1157967895' => '[email protected]', '-1626926859' => '555-123-4567'} | ||
|
||
// Stream the AI response based on the masked chat history and current message | ||
const stream = await chain.stream({ | ||
chat_history: guardedHistory, | ||
input: guardedMessageContent, | ||
}); | ||
|
||
// npm i ai | ||
// import { StreamingTextResponse } from "ai"; | ||
// return new StreamingTextResponse(stream); | ||
|
||
return; | ||
} catch (e: any) { | ||
return Response.json({ error: e.message }, { status: 500 }); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export { MaskingParser } from "./parser.js"; | ||
export { RegexMaskingTransformer } from "./regex_masking_transformer.js"; | ||
export { | ||
type MaskingParserConfig, | ||
type HashFunction, | ||
type HookFunction, | ||
} from "./types.js"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import { MaskingTransformer } from "./transformer.js"; | ||
import { MaskingParserConfig } from "./types.js"; | ||
|
||
/** | ||
* MaskingParser class for handling the masking and rehydrating of messages. | ||
*/ | ||
export class MaskingParser { | ||
private transformers: MaskingTransformer[]; | ||
private state: Map<string, string>; | ||
private config: MaskingParserConfig; | ||
|
||
constructor(config: MaskingParserConfig = {}) { | ||
this.transformers = config.transformers || []; | ||
this.state = new Map<string, string>(); | ||
this.config = config; | ||
} | ||
|
||
/** | ||
* Adds a transformer to the parser. | ||
* @param transformer - An instance of a class extending MaskingTransformer. | ||
*/ | ||
addTransformer(transformer: MaskingTransformer) { | ||
this.transformers.push(transformer); | ||
} | ||
|
||
/** | ||
* Getter method for retrieving the current state. | ||
* @returns The current state map. | ||
*/ | ||
public getState(): Map<string, string> { | ||
return this.state; | ||
} | ||
|
||
/** | ||
* Masks the provided message using the added transformers. | ||
* This method sequentially applies each transformer's masking logic to the message. | ||
* It utilizes a state map to track original values corresponding to their masked versions. | ||
* | ||
* @param message - The message to be masked. | ||
* @returns A masked version of the message. | ||
* @throws {TypeError} If the message is not a string. | ||
* @throws {Error} If no transformers are added. | ||
*/ | ||
async parse(message: string): Promise<string> { | ||
this.config.onMaskingStart?.(message); | ||
|
||
// Check if there are any transformers added to the parser. If not, throw an error | ||
// as masking requires at least one transformer to apply its logic. | ||
if (this.transformers.length === 0) { | ||
throw new Error( | ||
"MaskingParser.parse Error: No transformers have been added. Please add at least one transformer before parsing." | ||
); | ||
} | ||
|
||
if (typeof message !== "string") { | ||
throw new TypeError( | ||
"MaskingParser.parse Error: The 'message' argument must be a string." | ||
); | ||
} | ||
|
||
// Initialize the variable to hold the progressively masked message. | ||
// It starts as the original message and gets transformed by each transformer. | ||
let processedMessage = message; | ||
|
||
// Iterate through each transformer added to the parser. | ||
this.transformers.forEach((transformer) => { | ||
// Apply the transformer's transform method to the current state of the message. | ||
// The transform method returns a tuple containing the updated message and state. | ||
// The state is a map that tracks the original values of masked content. | ||
// This state is essential for the rehydration process to restore the original message. | ||
[processedMessage, this.state] = transformer.transform( | ||
processedMessage, | ||
this.state | ||
); | ||
}); | ||
|
||
this.config.onMaskingEnd?.(processedMessage); | ||
// Return the fully masked message after all transformers have been applied. | ||
return processedMessage; | ||
} | ||
|
||
/** | ||
* Rehydrates a masked message back to its original form. | ||
* This method sequentially applies the rehydration logic of each added transformer in reverse order. | ||
* It relies on the state map to correctly map the masked values back to their original values. | ||
* | ||
* The rehydration process is essential for restoring the original content of a message | ||
* that has been transformed (masked) by the transformers. This process is the inverse of the masking process. | ||
* | ||
* @param message - The masked message to be rehydrated. | ||
* @returns The original (rehydrated) version of the message. | ||
*/ | ||
async rehydrate( | ||
message: string, | ||
state?: Map<string, string> | ||
): Promise<string> { | ||
this.config.onRehydratingStart?.(message); | ||
|
||
if (typeof message !== "string") { | ||
throw new TypeError( | ||
"MaskingParser.rehydrate Error: The 'message' argument must be a string." | ||
); | ||
} | ||
// Check if any transformers have been added to the parser. | ||
// If no transformers are present, throw an error as rehydration requires at least one transformer. | ||
if (this.transformers.length === 0) { | ||
throw new Error( | ||
"MaskingParser.rehydrate Error: No transformers have been added. Please add at least one transformer before rehydrating." | ||
); | ||
} | ||
|
||
if (state && !(state instanceof Map)) { | ||
throw new TypeError( | ||
"MaskingParser.rehydrate Error: The 'state' argument, if provided, must be an instance of Map." | ||
); | ||
} | ||
|
||
const rehydrationState = state || this.state; // Use provided state or fallback to internal state | ||
// Initialize the rehydratedMessage with the input masked message. | ||
// This variable will undergo rehydration by each transformer in reverse order. | ||
let rehydratedMessage = message; | ||
this.transformers | ||
.slice() | ||
.reverse() | ||
.forEach((transformer) => { | ||
// Apply the transformer's rehydrate method to the current state of the message. | ||
// The rehydrate method uses the stored state (this.state) to map masked values | ||
// back to their original values, effectively undoing the masking transformation. | ||
rehydratedMessage = transformer.rehydrate( | ||
rehydratedMessage, | ||
rehydrationState | ||
); | ||
}); | ||
|
||
this.config.onRehydratingEnd?.(rehydratedMessage); | ||
// Return the fully rehydrated message after all transformers have been applied. | ||
return rehydratedMessage; | ||
} | ||
} |
Oops, something went wrong.