From 5301fc40e4764e779353fd4945b17642321d305f Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 21 Nov 2023 17:12:48 -0500 Subject: [PATCH 01/14] [Feature] Implementation of experimental masking parser/transformer --- langchain/scripts/create-entrypoints.js | 4 +- langchain/src/experimental/masking/index.ts | 7 + langchain/src/experimental/masking/parser.ts | 139 ++++++++ .../masking/pii_masking_transformer.ts | 160 +++++++++ .../masking/tests/mask-integration.test.ts | 73 ++++ .../masking/tests/masking.test.ts | 319 ++++++++++++++++++ .../src/experimental/masking/transformer.ts | 10 + langchain/src/experimental/masking/types.ts | 29 ++ 8 files changed, 740 insertions(+), 1 deletion(-) create mode 100644 langchain/src/experimental/masking/index.ts create mode 100644 langchain/src/experimental/masking/parser.ts create mode 100644 langchain/src/experimental/masking/pii_masking_transformer.ts create mode 100644 langchain/src/experimental/masking/tests/mask-integration.test.ts create mode 100644 langchain/src/experimental/masking/tests/masking.test.ts create mode 100644 langchain/src/experimental/masking/transformer.ts create mode 100644 langchain/src/experimental/masking/types.ts diff --git a/langchain/scripts/create-entrypoints.js b/langchain/scripts/create-entrypoints.js index 98200151910d..3095f2ffa599 100644 --- a/langchain/scripts/create-entrypoints.js +++ b/langchain/scripts/create-entrypoints.js @@ -17,7 +17,8 @@ const entrypoints = { "agents/toolkits/aws_sfn": "agents/toolkits/aws_sfn", "agents/toolkits/sql": "agents/toolkits/sql/index", "agents/format_scratchpad": "agents/format_scratchpad/openai_functions", - "agents/format_scratchpad/openai_tools": "agents/format_scratchpad/openai_tools", + "agents/format_scratchpad/openai_tools": + "agents/format_scratchpad/openai_tools", "agents/format_scratchpad/log": "agents/format_scratchpad/log", "agents/format_scratchpad/xml": "agents/format_scratchpad/xml", "agents/format_scratchpad/log_to_message": @@ -311,6 +312,7 @@ const entrypoints = { "experimental/hubs/makersuite/googlemakersuitehub", "experimental/chains/violation_of_expectations": "experimental/chains/violation_of_expectations/index", + "experimental/masking": "experimental/masking/index", // evaluation evaluation: "evaluation/index", // runnables diff --git a/langchain/src/experimental/masking/index.ts b/langchain/src/experimental/masking/index.ts new file mode 100644 index 000000000000..ebb4615ee182 --- /dev/null +++ b/langchain/src/experimental/masking/index.ts @@ -0,0 +1,7 @@ +export { MaskingParser } from "./parser.js"; +export { PIIMaskingTransformer } from "./pii_masking_transformer.js"; +export { + type MaskingParserConfig, + type HashFunction, + type HookFunction, +} from "./types.js"; diff --git a/langchain/src/experimental/masking/parser.ts b/langchain/src/experimental/masking/parser.ts new file mode 100644 index 000000000000..197685d67dd5 --- /dev/null +++ b/langchain/src/experimental/masking/parser.ts @@ -0,0 +1,139 @@ +import { MaskingTransformer } from "./transformer.js"; +import { MaskingParserConfig } from "./types.js"; + +/** + * MaskingParser class for handling the masking and rehydrating of messages. + */ +export class MaskingParser { + private transformers: MaskingTransformer[]; + private state: Map; + private config: MaskingParserConfig; + + constructor(config: MaskingParserConfig = {}) { + this.transformers = config.transformers || []; + this.state = new Map(); + this.config = config; + } + + /** + * Adds a transformer to the parser. + * @param transformer - An instance of a class extending MaskingTransformer. + */ + addTransformer(transformer: MaskingTransformer) { + this.transformers.push(transformer); + } + + /** + * Getter method for retrieving the current state. + * @returns The current state map. + */ + public getState(): Map { + return this.state; + } + + /** + * Masks the provided message using the added transformers. + * This method sequentially applies each transformer's masking logic to the message. + * It utilizes a state map to track original values corresponding to their masked versions. + * + * @param message - The message to be masked. + * @returns A masked version of the message. + * @throws {TypeError} If the message is not a string. + * @throws {Error} If no transformers are added. + */ + async parse(message: string): Promise { + this.config.onMaskingStart?.(message); + + // Check if there are any transformers added to the parser. If not, throw an error + // as masking requires at least one transformer to apply its logic. + if (this.transformers.length === 0) { + throw new Error( + "MaskingParser.parse Error: No transformers have been added. Please add at least one transformer before parsing." + ); + } + + if (typeof message !== "string") { + throw new TypeError( + "MaskingParser.parse Error: The 'message' argument must be a string." + ); + } + + // Initialize the variable to hold the progressively masked message. + // It starts as the original message and gets transformed by each transformer. + let processedMessage = message; + + // Iterate through each transformer added to the parser. + this.transformers.forEach((transformer) => { + // Apply the transformer's transform method to the current state of the message. + // The transform method returns a tuple containing the updated message and state. + // The state is a map that tracks the original values of masked content. + // This state is essential for the rehydration process to restore the original message. + [processedMessage, this.state] = transformer.transform( + processedMessage, + this.state + ); + }); + + this.config.onMaskingEnd?.(processedMessage); + // Return the fully masked message after all transformers have been applied. + return processedMessage; + } + + /** + * Rehydrates a masked message back to its original form. + * This method sequentially applies the rehydration logic of each added transformer in reverse order. + * It relies on the state map to correctly map the masked values back to their original values. + * + * The rehydration process is essential for restoring the original content of a message + * that has been transformed (masked) by the transformers. This process is the inverse of the masking process. + * + * @param message - The masked message to be rehydrated. + * @returns The original (rehydrated) version of the message. + */ + async rehydrate( + message: string, + state?: Map + ): Promise { + this.config.onRehydratingStart?.(message); + + if (typeof message !== "string") { + throw new TypeError( + "MaskingParser.rehydrate Error: The 'message' argument must be a string." + ); + } + // Check if any transformers have been added to the parser. + // If no transformers are present, throw an error as rehydration requires at least one transformer. + if (this.transformers.length === 0) { + throw new Error( + "MaskingParser.rehydrate Error: No transformers have been added. Please add at least one transformer before rehydrating." + ); + } + + if (state && !(state instanceof Map)) { + throw new TypeError( + "MaskingParser.rehydrate Error: The 'state' argument, if provided, must be an instance of Map." + ); + } + + const rehydrationState = state || this.state; // Use provided state or fallback to internal state + // Initialize the rehydratedMessage with the input masked message. + // This variable will undergo rehydration by each transformer in reverse order. + let rehydratedMessage = message; + this.transformers + .slice() + .reverse() + .forEach((transformer) => { + // Apply the transformer's rehydrate method to the current state of the message. + // The rehydrate method uses the stored state (this.state) to map masked values + // back to their original values, effectively undoing the masking transformation. + rehydratedMessage = transformer.rehydrate( + rehydratedMessage, + rehydrationState + ); + }); + + this.config.onRehydratingEnd?.(rehydratedMessage); + // Return the fully rehydrated message after all transformers have been applied. + return rehydratedMessage; + } +} diff --git a/langchain/src/experimental/masking/pii_masking_transformer.ts b/langchain/src/experimental/masking/pii_masking_transformer.ts new file mode 100644 index 000000000000..80d743138de9 --- /dev/null +++ b/langchain/src/experimental/masking/pii_masking_transformer.ts @@ -0,0 +1,160 @@ +import { MaskingTransformer } from "./transformer.js"; +import { HashFunction, MaskingPattern } from "./types.js"; +/** + * PIIMaskingTransformer class for masking and rehydrating messages with PII. + */ +export class PIIMaskingTransformer extends MaskingTransformer { + private patterns: { [key: string]: MaskingPattern }; + private hashFunction: HashFunction; + + /** + * Constructs a PIIMaskingTransformer with given patterns and an optional hash function. + * Validates the provided patterns to ensure they conform to the expected structure. + * + * @param patterns - An object containing masking patterns. Each pattern should include + * a regular expression (`regex`) and optionally a `replacement` string + * or a `mask` function. + * @param hashFunction - An optional custom hash function to be used for masking. + */ + constructor( + patterns: { [key: string]: MaskingPattern }, + hashFunction?: HashFunction + ) { + super(); + // Validates the provided masking patterns before initializing the transformer. + // This ensures that each pattern has a valid regular expression. + this.validatePatterns(patterns); + + // Assigns the validated patterns and the hash function to the transformer. + // If no custom hash function is provided, the default hash function is used. + this.patterns = patterns; + this.hashFunction = hashFunction || this.defaultHashFunction; + } + + /** + * Validates the given masking patterns to ensure each pattern has a valid regular expression. + * Throws an error if any pattern is found to be invalid. + * + * @param patterns - The patterns object to validate. + */ + private validatePatterns(patterns: { [key: string]: MaskingPattern }) { + for (const key in patterns) { + const pattern = patterns[key]; + // Checks that each pattern is an object and has a regex property that is an instance of RegExp. + // Throws an error if these conditions are not met, indicating an invalid pattern configuration. + if ( + !pattern || + typeof pattern !== "object" || + !(pattern.regex instanceof RegExp) + ) { + throw new Error("Invalid pattern configuration."); + } + } + } + + /** + * Masks content in a message based on the defined patterns. + * @param message - The message to be masked. + * @param state - The current state containing original values. + * @returns A tuple of the masked message and the updated state. + */ + transform( + message: string, + state: Map + ): [string, Map] { + if (typeof message !== "string") { + throw new TypeError( + "PIIMaskingTransformer.transform Error: The 'message' argument must be a string." + ); + } + + if (!(state instanceof Map)) { + throw new TypeError( + "PIIMaskingTransformer.transform Error: The 'state' argument must be an instance of Map." + ); + } + + // Holds the progressively masked message + let processedMessage = message; + + // Initialize original values map with the current state or a new map + let originalValues = state || new Map(); + + // Iterate over each pattern defined in the transformer + for (const key in this.patterns) { + const pattern = this.patterns[key]; + + // Apply the current pattern's regex to the message + processedMessage = processedMessage.replace(pattern.regex, (match) => { + // Determine the masked value: use the mask function if provided, else use the replacement string, + // else use the hash function. + const maskedValue = pattern.mask + ? pattern.mask(match) + : pattern.replacement ?? this.hashFunction(match); + + // Store the mapping of the masked value to the original value (match) + originalValues.set(maskedValue, match); + + // Return the masked value to replace the original value in the message + return maskedValue; + }); + } + + // Return the fully masked message and the state map with all original values + return [processedMessage, originalValues]; + } + + /** + * Rehydrates a masked message back to its original form using the provided state. + * @param message - The masked message to be rehydrated. + * @param state - The state map containing mappings of masked values to their original values. + * @returns The rehydrated (original) message. + */ + rehydrate(message: string, state: Map): string { + if (typeof message !== "string") { + throw new TypeError( + "PIIMaskingTransformer.rehydrate Error: The 'message' argument must be a string." + ); + } + + if (!(state instanceof Map)) { + throw new TypeError( + "PIIMaskingTransformer.rehydrate Error: The 'state' argument must be an instance of Map." + ); + } + + // Convert the state map to an array and use reduce to sequentially replace masked values with original values. + return Array.from(state).reduce((msg, [masked, original]) => { + // Escape special characters in the masked string to ensure it can be used in a regular expression safely. + // This is necessary because masked values might contain characters that have special meanings in regex. + const escapedMasked = masked.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + + // Replace all instances of the escaped masked value in the message with the original value. + // The 'g' flag in the RegExp ensures that all occurrences of the masked value are replaced. + return msg.replace(new RegExp(escapedMasked, "g"), original); + }, message); + } + + /** + * Default hash function for creating unique hash values. + * @param input - The input string to hash. + * @returns The resulting hash as a string. + */ + private defaultHashFunction(input: string): string { + let hash = 0; + // Iterate over each character in the input string + for (let i = 0; i < input.length; i++) { + // Get ASCII value of the character + const char = input.charCodeAt(i); + // Combine the current hash with the new character and ensure it remains a 32-bit integer + hash = (hash << 5) - hash + char; + // Bitwise OR operation to convert to a 32-bit integer. + // This is a common technique to ensure the final hash value stays within the 32-bit limit, + // effectively wrapping the value when it becomes too large. + hash |= 0; + } + + // Convert the numerical hash value to a string and return + return hash.toString(); + } +} diff --git a/langchain/src/experimental/masking/tests/mask-integration.test.ts b/langchain/src/experimental/masking/tests/mask-integration.test.ts new file mode 100644 index 000000000000..26984c11b227 --- /dev/null +++ b/langchain/src/experimental/masking/tests/mask-integration.test.ts @@ -0,0 +1,73 @@ +// yarn test:single src/experimental/masking/tests/mask-integration.test.ts +import { MaskingParser, PIIMaskingTransformer } from "../index.js"; + +// Mock database for simulating state storage and retrieval +const mockDB = (() => { + const db = new Map(); + return { + async saveState(key: string, serializedState: string) { + db.set(key, serializedState); + }, + async getState(key: string): Promise { + return db.get(key) || ""; + }, + }; +})(); + +function serializeState(state: Map): string { + return JSON.stringify(Array.from(state.entries())); +} + +function deserializeState(serializedState: string): Map { + return new Map(JSON.parse(serializedState)); +} + +describe("MaskingParser Integration Test", () => { + let parser: MaskingParser; + let transformer: PIIMaskingTransformer; + const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; + const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; + + beforeEach(() => { + transformer = new PIIMaskingTransformer({ + email: emailPattern, + phone: phonePattern, + }); + + parser = new MaskingParser(); + parser.addTransformer(transformer); + }); + + it("should mask, store state, and rehydrate with altered order", async () => { + const originalMessage = "Contact me at jane.doe@email.com or 555-123-4567."; + const maskedMessage = await parser.parse(originalMessage); + + // Serialize and store the state + const serializedState = serializeState(parser.getState()); + await mockDB.saveState("uniqueMessageId", serializedState); + + // Simulate retrieving and altering the masked message + // Here, we assume the AI processing reverses the order of masked content + // Simulate retrieving and altering the masked message + const alteredMaskedMessage = maskedMessage.split(" ").reverse().join(" "); + + // Retrieve and deserialize the state + const retrievedSerializedState = await mockDB.getState("uniqueMessageId"); + const retrievedState = deserializeState(retrievedSerializedState); + + // Rehydrate the altered message + const rehydratedMessage = await parser.rehydrate( + alteredMaskedMessage, + retrievedState + ); + + // The expectation depends on how the alteration affects the masked message. + // Here, we assume that the rehydrated message should match the original message + // even after the alteration since the masked content still aligns with the stored state. + const expectedRehydratedMessage = originalMessage + .split(" ") + .reverse() + .join(" "); + expect(rehydratedMessage).toEqual(expectedRehydratedMessage); + }); +}); diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts new file mode 100644 index 000000000000..e51ca8371dfc --- /dev/null +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -0,0 +1,319 @@ +// yarn test:single src/experimental/masking/tests/masking.test.ts +import { MaskingParser, PIIMaskingTransformer } from "../index.js"; +import { jest } from "@jest/globals"; + +describe("MaskingParser and PIIMaskingTransformer", () => { + describe("Masking with Static Identifiers", () => { + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; + const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({ + email: emailPattern, + phone: phonePattern, + }); + + maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + }); + + it("masks single occurrences of PII with static identifiers", async () => { + const message = "Contact me at jane.doe@email.com or 555-123-4567."; + const expectedMaskedMessage = "Contact me at [email] or [phone]."; + + const maskedMessage = await maskingParser.parse(message); + + expect(maskedMessage).toBe(expectedMaskedMessage); + }); + + it("rehydrates static masked data to its original form", async () => { + const maskedMessage = "Contact me at [email] or [phone]."; + const expectedOriginalMessage = + "Contact me at jane.doe@email.com or 555-123-4567."; + + await maskingParser.parse(expectedOriginalMessage); // Masking original message + const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); + + expect(rehydratedMessage).toBe(expectedOriginalMessage); + }); + }); + + describe("Masking with Dynamic Identifiers", () => { + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + const emailMask = (match: string) => + `[email-${Math.random().toString(16).slice(2)}]`; + const phoneMask = (match: string) => + `[phone-${Math.random().toString(16).slice(2)}]`; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({ + email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, + phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, + }); + + maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + }); + + it("masks multiple occurrences of different PII with unique identifiers", async () => { + const message = + "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; + const maskedMessage = await maskingParser.parse(message); + + expect(maskedMessage).toMatch(/\[email-[a-f0-9]+\]/g); + expect(maskedMessage).toMatch(/\[phone-[a-f0-9]+\]/g); + expect((maskedMessage.match(/\[email-[a-f0-9]+\]/g) || []).length).toBe( + 2 + ); + expect((maskedMessage.match(/\[phone-[a-f0-9]+\]/g) || []).length).toBe( + 1 + ); + }); + + it("rehydrates dynamic masked data to its original form", async () => { + const originalMessage = + "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; + const maskedMessage = await maskingParser.parse(originalMessage); + const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); + + expect(rehydratedMessage).toBe(originalMessage); + }); + + it("masks identical PII with consistent dynamic identifiers", async () => { + const message = + "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com and 555-123-4567"; + const maskedMessage = await maskingParser.parse(message); + + expect(maskedMessage).toMatch(/\[email-[a-f0-9]+\]/g); + expect(maskedMessage).toMatch(/\[phone-[a-f0-9]+\]/g); + expect((maskedMessage.match(/\[email-[a-f0-9]+\]/g) || []).length).toBe( + 2 + ); + expect((maskedMessage.match(/\[phone-[a-f0-9]+\]/g) || []).length).toBe( + 2 + ); + }); + }); + + describe("PIIMaskingTransformer with Default Hash Function", () => { + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; + const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({ + email: emailPattern, + phone: phonePattern, + }); + + maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + }); + + it("should mask email and phone using default hash function", async () => { + const piiMaskingTransformer = new PIIMaskingTransformer({ + email: emailPattern, + phone: phonePattern, + }); + const maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + + const message = + "My email is jane.doe@email.com and phone is 555-123-4567."; + const maskedMessage = await maskingParser.parse(message); + + expect(maskedMessage).toContain("[email]"); + expect(maskedMessage).toContain("[phone]"); + }); + }); + + describe("PIIMaskingTransformer with Custom Hash Function", () => { + const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; + const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; + + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({ + email: emailPattern, + phone: phonePattern, + }); + + maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + }); + + const customHashFunction = (input: string) => { + // Simple custom hash function for testing purposes + return `hashed-${input}`; + }; + + it("should mask email and phone using custom hash function", async () => { + const piiMaskingTransformer = new PIIMaskingTransformer( + { + email: { + regex: /\S+@\S+\.\S+/, + mask: (match) => `custom-email-${customHashFunction(match)}`, + }, + phone: { + regex: /\d{3}-\d{3}-\d{4}/, + mask: (match) => `custom-phone-${customHashFunction(match)}`, + }, + }, + customHashFunction + ); + + const maskingParser = new MaskingParser(); + maskingParser.addTransformer(piiMaskingTransformer); + + const message = "Contact me at jane.doe@email.com or 555-123-4567."; + const maskedMessage = await maskingParser.parse(message); + + expect(maskedMessage).toContain("custom-email-hashed-jane.doe@email.com"); + expect(maskedMessage).toContain("custom-phone-hashed-555-123-4567"); + }); + + it("should rehydrate masked data correctly using custom hash function", async () => { + const piiMaskingTransformer = new PIIMaskingTransformer( + { + email: { + regex: /\S+@\S+\.\S+/, + mask: (match) => `custom-email-${customHashFunction(match)}`, + }, + phone: { + regex: /\d{3}-\d{3}-\d{4}/, + mask: (match) => `custom-phone-${customHashFunction(match)}`, + }, + }, + customHashFunction + ); + + maskingParser.addTransformer(piiMaskingTransformer); + + const originalMessage = + "Contact me at jane.doe@email.com or 555-123-4567."; + const maskedMessage = await maskingParser.parse(originalMessage); + const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); + + expect(rehydratedMessage).toBe(originalMessage); + }); + }); + + describe("Error Handling in MaskingParser", () => { + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({}); + maskingParser = new MaskingParser(); + }); + + it("throws an error when no transformers are added and parse is called", async () => { + const message = "Some message"; + await expect(maskingParser.parse(message)).rejects.toThrow( + "MaskingParser.parse Error: No transformers have been added. Please add at least one transformer before parsing." + ); + }); + + it("throws an error when no transformers are added and rehydrate is called", async () => { + const message = "Some masked message"; + await expect(maskingParser.rehydrate(message)).rejects.toThrow( + "MaskingParser.rehydrate Error: No transformers have been added. Please add at least one transformer before rehydrating." + ); + }); + + it("throws an error for invalid message type in parse", async () => { + const invalidMessage: any = 123; // intentionally incorrect type + maskingParser.addTransformer(piiMaskingTransformer); // Add a transformer + await expect(maskingParser.parse(invalidMessage)).rejects.toThrow( + "The 'message' argument must be a string." + ); + }); + + it("throws an error for invalid message type in rehydrate", async () => { + const invalidMessage: any = 123; // intentionally incorrect type + await expect(maskingParser.rehydrate(invalidMessage)).rejects.toThrow( + "The 'message' argument must be a string." + ); + }); + }); + + describe("Error Handling in PIIMaskingTransformer", () => { + it("throws an error for invalid message type in transform", () => { + const transformer = new PIIMaskingTransformer({}); + const invalidMessage: any = 123; // intentionally incorrect type + const state = new Map(); + expect(() => transformer.transform(invalidMessage, state)).toThrow( + "The 'message' argument must be a string." + ); + }); + + it("throws an error for invalid state type in transform", () => { + const transformer = new PIIMaskingTransformer({}); + const message = "Some message"; + const invalidState: any = {}; // intentionally incorrect type + expect(() => transformer.transform(message, invalidState)).toThrow( + "The 'state' argument must be an instance of Map." + ); + }); + + it("throws an error when initialized with invalid regex pattern", () => { + expect(() => { + // @ts-expect-error + new PIIMaskingTransformer({ invalid: { regex: null } }); + }).toThrow("Invalid pattern configuration."); + }); + }); + + describe("MaskingParser Hooks", () => { + let maskingParser: MaskingParser; + let piiMaskingTransformer: PIIMaskingTransformer; + const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; + + beforeEach(() => { + piiMaskingTransformer = new PIIMaskingTransformer({ + email: emailPattern, + }); + }); + + it("calls onMaskingStart and onMaskingEnd hooks during parse", async () => { + const onMaskingStart = jest.fn(); + const onMaskingEnd = jest.fn(); + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + }); + + const message = "Contact me at jane.doe@email.com"; + await maskingParser.parse(message); + + expect(onMaskingStart).toHaveBeenCalledWith(message); + expect(onMaskingEnd).toHaveBeenCalled(); + }); + + it("calls onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { + const onRehydratingStart = jest.fn(); + const onRehydratingEnd = jest.fn(); + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onRehydratingStart, + onRehydratingEnd, + }); + + const message = "Contact me at [email]"; + await maskingParser.parse(message); // necessary to populate the state + await maskingParser.rehydrate(message); + + expect(onRehydratingStart).toHaveBeenCalledWith(message); + expect(onRehydratingEnd).toHaveBeenCalled(); + }); + }); +}); diff --git a/langchain/src/experimental/masking/transformer.ts b/langchain/src/experimental/masking/transformer.ts new file mode 100644 index 000000000000..2ae89cb856d3 --- /dev/null +++ b/langchain/src/experimental/masking/transformer.ts @@ -0,0 +1,10 @@ +/** + * Abstract class representing a transformer used for masking and rehydrating messages. + */ +export abstract class MaskingTransformer { + abstract transform( + message: string, + state?: Map + ): [string, Map]; + abstract rehydrate(message: string, state: Map): string; +} diff --git a/langchain/src/experimental/masking/types.ts b/langchain/src/experimental/masking/types.ts new file mode 100644 index 000000000000..4bc475f768c2 --- /dev/null +++ b/langchain/src/experimental/masking/types.ts @@ -0,0 +1,29 @@ +import { MaskingTransformer } from "./transformer.js"; +/** + * Configuration type for MaskingParser. + */ + +export type MaskingParserConfig = { + transformers?: MaskingTransformer[]; + defaultHashFunction?: HashFunction; + onMaskingStart?: HookFunction; + onMaskingEnd?: HookFunction; + onRehydratingStart?: HookFunction; + onRehydratingEnd?: HookFunction; +}; + +/** + * Regex Masking Pattern used for masking in PIIMaskingTransformer. + */ +export type MaskingPattern = { + regex: RegExp; + replacement?: string; + mask?: (match: string) => string; +}; + +export type HookFunction = (message: string) => void; + +/** + * Represents a function that can hash a string input. + */ +export type HashFunction = (input: string) => string; From 699722215b5e9170dae6d71ff60e9a30e5ce6675 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 28 Nov 2023 15:39:05 -0500 Subject: [PATCH 02/14] test: add perf unit test --- .../masking/tests/masking.test.ts | 44 ++++++++++++++++--- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index e51ca8371dfc..859476059a2a 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -38,6 +38,31 @@ describe("MaskingParser and PIIMaskingTransformer", () => { expect(rehydratedMessage).toBe(expectedOriginalMessage); }); + + function generateLargeMessage() { + let largeMessage = ""; + for (let i = 0; i < 10000; i++) { + // Adjust the number for desired message size + largeMessage += `User${i}: jane.doe${i}@email.com, 555-123-${i + .toString() + .padStart(4, "0")}. `; + } + return largeMessage; + } + + describe("Performance Testing", () => { + it("efficiently processes large data sets", async () => { + const largeMessage = generateLargeMessage(); + const startTime = performance.now(); + const maskedMessage = await maskingParser.parse(largeMessage); + const endTime = performance.now(); + + const someAcceptableDuration = 5000; // Set this to a duration you consider acceptable, e.g., 5000 milliseconds (5 seconds) + + expect(maskedMessage).toBeDefined(); + expect(endTime - startTime).toBeLessThan(someAcceptableDuration); + }); + }); }); describe("Masking with Dynamic Identifiers", () => { @@ -149,10 +174,13 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); const customHashFunction = (input: string) => { - // Simple custom hash function for testing purposes - return `hashed-${input}`; + // A simple hash function that creates a mock hash representation of the input. + // This is just for demonstration purposes and not a secure hashing method. + return input + .split("") + .map((char) => "*") + .join(""); }; - it("should mask email and phone using custom hash function", async () => { const piiMaskingTransformer = new PIIMaskingTransformer( { @@ -174,8 +202,14 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const message = "Contact me at jane.doe@email.com or 555-123-4567."; const maskedMessage = await maskingParser.parse(message); - expect(maskedMessage).toContain("custom-email-hashed-jane.doe@email.com"); - expect(maskedMessage).toContain("custom-phone-hashed-555-123-4567"); + // The lengths of the masked parts should be equal to the lengths of the original email and phone number. + const expectedEmailMask = + "custom-email-" + "*".repeat("jane.doe@email.com".length); + const expectedPhoneMask = + "custom-phone-" + "*".repeat("555-123-4567".length); + + expect(maskedMessage).toContain(expectedEmailMask); + expect(maskedMessage).toContain(expectedPhoneMask); }); it("should rehydrate masked data correctly using custom hash function", async () => { From aeb37d7cf4b0c5599f7a09177f47e4c696a09036 Mon Sep 17 00:00:00 2001 From: Jordan Date: Wed, 29 Nov 2023 17:10:16 -0500 Subject: [PATCH 03/14] fix: rename piitransformer to regextransformer --- langchain/src/experimental/masking/index.ts | 2 +- ...former.ts => regex_masking_transformer.ts} | 14 +++---- .../masking/tests/mask-integration.test.ts | 6 +-- .../masking/tests/masking.test.ts | 38 +++++++++---------- 4 files changed, 30 insertions(+), 30 deletions(-) rename langchain/src/experimental/masking/{pii_masking_transformer.ts => regex_masking_transformer.ts} (90%) diff --git a/langchain/src/experimental/masking/index.ts b/langchain/src/experimental/masking/index.ts index ebb4615ee182..d8b07444e1d5 100644 --- a/langchain/src/experimental/masking/index.ts +++ b/langchain/src/experimental/masking/index.ts @@ -1,5 +1,5 @@ export { MaskingParser } from "./parser.js"; -export { PIIMaskingTransformer } from "./pii_masking_transformer.js"; +export { RegexMaskingTransformer } from "./regex_masking_transformer.js"; export { type MaskingParserConfig, type HashFunction, diff --git a/langchain/src/experimental/masking/pii_masking_transformer.ts b/langchain/src/experimental/masking/regex_masking_transformer.ts similarity index 90% rename from langchain/src/experimental/masking/pii_masking_transformer.ts rename to langchain/src/experimental/masking/regex_masking_transformer.ts index 80d743138de9..775cfaa21275 100644 --- a/langchain/src/experimental/masking/pii_masking_transformer.ts +++ b/langchain/src/experimental/masking/regex_masking_transformer.ts @@ -1,14 +1,14 @@ import { MaskingTransformer } from "./transformer.js"; import { HashFunction, MaskingPattern } from "./types.js"; /** - * PIIMaskingTransformer class for masking and rehydrating messages with PII. + * RegexMaskingTransformer class for masking and rehydrating messages with Regex. */ -export class PIIMaskingTransformer extends MaskingTransformer { +export class RegexMaskingTransformer extends MaskingTransformer { private patterns: { [key: string]: MaskingPattern }; private hashFunction: HashFunction; /** - * Constructs a PIIMaskingTransformer with given patterns and an optional hash function. + * Constructs a RegexMaskingTransformer with given patterns and an optional hash function. * Validates the provided patterns to ensure they conform to the expected structure. * * @param patterns - An object containing masking patterns. Each pattern should include @@ -64,13 +64,13 @@ export class PIIMaskingTransformer extends MaskingTransformer { ): [string, Map] { if (typeof message !== "string") { throw new TypeError( - "PIIMaskingTransformer.transform Error: The 'message' argument must be a string." + "RegexMaskingTransformer.transform Error: The 'message' argument must be a string." ); } if (!(state instanceof Map)) { throw new TypeError( - "PIIMaskingTransformer.transform Error: The 'state' argument must be an instance of Map." + "RegexMaskingTransformer.transform Error: The 'state' argument must be an instance of Map." ); } @@ -113,13 +113,13 @@ export class PIIMaskingTransformer extends MaskingTransformer { rehydrate(message: string, state: Map): string { if (typeof message !== "string") { throw new TypeError( - "PIIMaskingTransformer.rehydrate Error: The 'message' argument must be a string." + "RegexMaskingTransformer.rehydrate Error: The 'message' argument must be a string." ); } if (!(state instanceof Map)) { throw new TypeError( - "PIIMaskingTransformer.rehydrate Error: The 'state' argument must be an instance of Map." + "RegexMaskingTransformer.rehydrate Error: The 'state' argument must be an instance of Map." ); } diff --git a/langchain/src/experimental/masking/tests/mask-integration.test.ts b/langchain/src/experimental/masking/tests/mask-integration.test.ts index 26984c11b227..d0b80d1738da 100644 --- a/langchain/src/experimental/masking/tests/mask-integration.test.ts +++ b/langchain/src/experimental/masking/tests/mask-integration.test.ts @@ -1,5 +1,5 @@ // yarn test:single src/experimental/masking/tests/mask-integration.test.ts -import { MaskingParser, PIIMaskingTransformer } from "../index.js"; +import { MaskingParser, RegexMaskingTransformer } from "../index.js"; // Mock database for simulating state storage and retrieval const mockDB = (() => { @@ -24,12 +24,12 @@ function deserializeState(serializedState: string): Map { describe("MaskingParser Integration Test", () => { let parser: MaskingParser; - let transformer: PIIMaskingTransformer; + let transformer: RegexMaskingTransformer; const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; beforeEach(() => { - transformer = new PIIMaskingTransformer({ + transformer = new RegexMaskingTransformer({ email: emailPattern, phone: phonePattern, }); diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index 859476059a2a..e8e103540272 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -1,16 +1,16 @@ // yarn test:single src/experimental/masking/tests/masking.test.ts -import { MaskingParser, PIIMaskingTransformer } from "../index.js"; +import { MaskingParser, RegexMaskingTransformer } from "../index.js"; import { jest } from "@jest/globals"; describe("MaskingParser and PIIMaskingTransformer", () => { describe("Masking with Static Identifiers", () => { let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({ + piiMaskingTransformer = new RegexMaskingTransformer({ email: emailPattern, phone: phonePattern, }); @@ -67,14 +67,14 @@ describe("MaskingParser and PIIMaskingTransformer", () => { describe("Masking with Dynamic Identifiers", () => { let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; const emailMask = (match: string) => `[email-${Math.random().toString(16).slice(2)}]`; const phoneMask = (match: string) => `[phone-${Math.random().toString(16).slice(2)}]`; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({ + piiMaskingTransformer = new RegexMaskingTransformer({ email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, }); @@ -125,12 +125,12 @@ describe("MaskingParser and PIIMaskingTransformer", () => { describe("PIIMaskingTransformer with Default Hash Function", () => { let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({ + piiMaskingTransformer = new RegexMaskingTransformer({ email: emailPattern, phone: phonePattern, }); @@ -140,7 +140,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); it("should mask email and phone using default hash function", async () => { - const piiMaskingTransformer = new PIIMaskingTransformer({ + const piiMaskingTransformer = new RegexMaskingTransformer({ email: emailPattern, phone: phonePattern, }); @@ -161,10 +161,10 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const phonePattern = { regex: /\d{3}-\d{3}-\d{4}/, replacement: "[phone]" }; let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({ + piiMaskingTransformer = new RegexMaskingTransformer({ email: emailPattern, phone: phonePattern, }); @@ -182,7 +182,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { .join(""); }; it("should mask email and phone using custom hash function", async () => { - const piiMaskingTransformer = new PIIMaskingTransformer( + const piiMaskingTransformer = new RegexMaskingTransformer( { email: { regex: /\S+@\S+\.\S+/, @@ -213,7 +213,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); it("should rehydrate masked data correctly using custom hash function", async () => { - const piiMaskingTransformer = new PIIMaskingTransformer( + const piiMaskingTransformer = new RegexMaskingTransformer( { email: { regex: /\S+@\S+\.\S+/, @@ -240,10 +240,10 @@ describe("MaskingParser and PIIMaskingTransformer", () => { describe("Error Handling in MaskingParser", () => { let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({}); + piiMaskingTransformer = new RegexMaskingTransformer({}); maskingParser = new MaskingParser(); }); @@ -279,7 +279,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { describe("Error Handling in PIIMaskingTransformer", () => { it("throws an error for invalid message type in transform", () => { - const transformer = new PIIMaskingTransformer({}); + const transformer = new RegexMaskingTransformer({}); const invalidMessage: any = 123; // intentionally incorrect type const state = new Map(); expect(() => transformer.transform(invalidMessage, state)).toThrow( @@ -288,7 +288,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); it("throws an error for invalid state type in transform", () => { - const transformer = new PIIMaskingTransformer({}); + const transformer = new RegexMaskingTransformer({}); const message = "Some message"; const invalidState: any = {}; // intentionally incorrect type expect(() => transformer.transform(message, invalidState)).toThrow( @@ -299,18 +299,18 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("throws an error when initialized with invalid regex pattern", () => { expect(() => { // @ts-expect-error - new PIIMaskingTransformer({ invalid: { regex: null } }); + new RegexMaskingTransformer({ invalid: { regex: null } }); }).toThrow("Invalid pattern configuration."); }); }); describe("MaskingParser Hooks", () => { let maskingParser: MaskingParser; - let piiMaskingTransformer: PIIMaskingTransformer; + let piiMaskingTransformer: RegexMaskingTransformer; const emailPattern = { regex: /\S+@\S+\.\S+/, replacement: "[email]" }; beforeEach(() => { - piiMaskingTransformer = new PIIMaskingTransformer({ + piiMaskingTransformer = new RegexMaskingTransformer({ email: emailPattern, }); }); From d85d15d0ffd725d941e6e7057898708e2371a66c Mon Sep 17 00:00:00 2001 From: Dzmitry Dubarau Date: Fri, 1 Dec 2023 14:42:16 -0500 Subject: [PATCH 04/14] added example Kitchen Sink for masking parser --- .../src/experimental/masking/kitchen_sink.ts | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 examples/src/experimental/masking/kitchen_sink.ts diff --git a/examples/src/experimental/masking/kitchen_sink.ts b/examples/src/experimental/masking/kitchen_sink.ts new file mode 100644 index 000000000000..808554d0b37c --- /dev/null +++ b/examples/src/experimental/masking/kitchen_sink.ts @@ -0,0 +1,80 @@ +import { + MaskingParser, + RegexMaskingTransformer, + } from "langchain/experimental/masking"; + + // A simple hash function for demonstration purposes + function simpleHash(input: string): string { + let hash = 0; + for (let i = 0; i < input.length; i++) { + const char = input.charCodeAt(i); + hash = (hash << 5) - hash + char; + hash |= 0; // Convert to 32bit integer + } + return hash.toString(16); + } + + const emailMask = (match: string) => `[email-${simpleHash(match)}]`; + const phoneMask = (match: string) => `[phone-${simpleHash(match)}]`; + const nameMask = (match: string) => `[name-${simpleHash(match)}]`; + const ssnMask = (match: string) => `[ssn-${simpleHash(match)}]`; + const creditCardMask = (match: string) => `[creditcard-${simpleHash(match)}]`; + const passportMask = (match: string) => `[passport-${simpleHash(match)}]`; + const licenseMask = (match: string) => `[license-${simpleHash(match)}]`; + const addressMask = (match: string) => `[address-${simpleHash(match)}]`; + const dobMask = (match: string) => `[dob-${simpleHash(match)}]`; + const bankAccountMask = (match: string) => `[bankaccount-${simpleHash(match)}]`; + + // Regular expressions for different types of PII + const patterns = { + email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, + phone: { regex: /\b\d{3}-\d{3}-\d{4}\b/g, mask: phoneMask }, + name: { regex: /\b[A-Z][a-z]+ [A-Z][a-z]+\b/g, mask: nameMask }, + ssn: { regex: /\b\d{3}-\d{2}-\d{4}\b/g, mask: ssnMask }, + creditCard: { regex: /\b(?:\d{4}[ -]?){3}\d{4}\b/g, mask: creditCardMask }, + passport: { regex: /(?i)\b[A-Z]{1,2}\d{6,9}\b/g, mask: passportMask }, + license: { regex: /(?i)\b[A-Z]{1,2}\d{6,8}\b/g, mask: licenseMask }, + address: { + regex: /\b\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+)\*\b/g, + mask: addressMask, + }, + dob: { regex: /\b\d{4}-\d{2}-\d{2}\b/g, mask: dobMask }, + bankAccount: { regex: /\b\d{8,17}\b/g, mask: bankAccountMask }, + }; + + // Create a RegexMaskingTransformer with multiple patterns + const piiMaskingTransformer = new RegexMaskingTransformer(patterns); + + // Hooks for different stages of masking and rehydrating + const onMaskingStart = (message: string) => + console.log(`Starting to mask message: ${message}`); + const onMaskingEnd = (maskedMessage: string) => + console.log(`Masked message: ${maskedMessage}`); + const onRehydratingStart = (message: string) => + console.log(`Starting to rehydrate message: ${message}`); + const onRehydratingEnd = (rehydratedMessage: string) => + console.log(`Rehydrated message: ${rehydratedMessage}`); + + // Initialize MaskingParser with the transformer and hooks + const maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + onRehydratingStart, + onRehydratingEnd, + }); + + // Example message containing multiple types of PII + const message = + "Contact Jane Doe at jane.doe@email.com or 555-123-4567. Her SSN is 123-45-6789 and her credit card number is 1234-5678-9012-3456. Passport number: AB1234567, Driver's License: X1234567, Address: 123 Main St, Date of Birth: 1990-01-01, Bank Account: 12345678901234567."; + + // Mask and rehydrate the message + maskingParser + .parse(message) + .then((maskedMessage: string) => { + console.log(`Masked message: ${maskedMessage}`); + return maskingParser.rehydrate(maskedMessage); + }) + .then((rehydratedMessage: string) => { + console.log(`Final rehydrated message: ${rehydratedMessage}`); + }); \ No newline at end of file From af49c2b8d53416a0123e74f186b7fef0a3925a5b Mon Sep 17 00:00:00 2001 From: Jordan Date: Mon, 4 Dec 2023 13:29:17 -0500 Subject: [PATCH 05/14] docs: Add documentation, nextjs example and kitchen sink example --- .../docs/modules/experimental/index.mdx | 5 + .../docs/modules/experimental/mask/mask.mdx | 64 ++++++++ docs/core_docs/docs/modules/index.mdx | 4 + .../src/experimental/masking/kitchen_sink.ts | 142 +++++++++--------- examples/src/experimental/masking/next.ts | 73 +++++++++ .../masking/tests/masking.test.ts | 8 +- 6 files changed, 220 insertions(+), 76 deletions(-) create mode 100644 docs/core_docs/docs/modules/experimental/index.mdx create mode 100644 docs/core_docs/docs/modules/experimental/mask/mask.mdx create mode 100644 examples/src/experimental/masking/next.ts diff --git a/docs/core_docs/docs/modules/experimental/index.mdx b/docs/core_docs/docs/modules/experimental/index.mdx new file mode 100644 index 000000000000..c438bd88faba --- /dev/null +++ b/docs/core_docs/docs/modules/experimental/index.mdx @@ -0,0 +1,5 @@ +--- +sidebar_position: 6 +--- + +# Experimental diff --git a/docs/core_docs/docs/modules/experimental/mask/mask.mdx b/docs/core_docs/docs/modules/experimental/mask/mask.mdx new file mode 100644 index 000000000000..d1ffd31da0d8 --- /dev/null +++ b/docs/core_docs/docs/modules/experimental/mask/mask.mdx @@ -0,0 +1,64 @@ +# Masking + +The experimental masking parser and transformer is an extendable module for masking and rehydrating strings. One of the primary use cases for this module is to redact PII (Personal Identifiable Information) from a string before making a call to an llm. + +### Real world scenario + +A customer support system receives messages containing sensitive customer information. The system must parse these messages, mask any PII (like names, email addresses, and phone numbers), and log them for analysis while complying with privacy regulations. Before logging the transcript a summary is generated using an llm. + +## Example + +Use the RegexMaskingTransformer to create a simple mask for email and phone. + +```typescript +import { + MaskingParser, + RegexMaskingTransformer, +} from "langchain/experimental/masking"; + +// Define masking strategy +const emailMask = () => `[email-${Math.random().toString(16).slice(2)}]`; +const phoneMask = () => `[phone-${Math.random().toString(16).slice(2)}]`; + +// Configure pii transformer +const piiMaskingTransformer = new RegexMaskingTransformer({ + email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, + phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, +}); + +maskingParser = new MaskingParser(); +maskingParser.addTransformer(piiMaskingTransformer); + +const input = + "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; +const masked = await maskingParser.parse(message); + +console.log(masked); +// Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] + +const rehydrated = maskingParser.rehydrate(masked); +console.log(rehydrated); +// Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com +``` + +:::note +If you plan on storing the masking state to rehydrate the original values asynchronously ensure you are following best security practices. In most cases you will want to define a custom hashing and salting strategy. +::: + +## Get started + +import CodeBlock from "@theme/CodeBlock"; +import ExampleKitchenSink from "@examples/experimental/masking/kitchen_sink.ts"; +import ExampleNext from "@examples/experimental/masking/next.ts"; +import ExampleStream from "@examples/chains/llm_chain_stream.ts"; +import ExampleCancellation from "@examples/chains/llm_chain_cancellation.ts"; + +### Next.js stream + +Example nextjs chat endpoint leveraging the RegexMaskingTransformer. The current chat message and chat message history are masked every time the api is called with a chat payload. + +{ExampleNext} + +### Kitchen sink + +{ExampleKitchenSink} diff --git a/docs/core_docs/docs/modules/index.mdx b/docs/core_docs/docs/modules/index.mdx index dfae71f5175a..8e8bd58c0e91 100644 --- a/docs/core_docs/docs/modules/index.mdx +++ b/docs/core_docs/docs/modules/index.mdx @@ -29,3 +29,7 @@ Persist application state between runs of a chain #### [Callbacks](/docs/modules/callbacks/) Log and stream intermediate steps of any chain + +#### [Experimental](/docs/modules/experimental/) + +Experimental modules not yet ready for production consumption diff --git a/examples/src/experimental/masking/kitchen_sink.ts b/examples/src/experimental/masking/kitchen_sink.ts index 808554d0b37c..f242848e8139 100644 --- a/examples/src/experimental/masking/kitchen_sink.ts +++ b/examples/src/experimental/masking/kitchen_sink.ts @@ -1,80 +1,80 @@ import { - MaskingParser, - RegexMaskingTransformer, - } from "langchain/experimental/masking"; - - // A simple hash function for demonstration purposes - function simpleHash(input: string): string { - let hash = 0; - for (let i = 0; i < input.length; i++) { + MaskingParser, + RegexMaskingTransformer, +} from "langchain/experimental/masking"; + +// A simple hash function for demonstration purposes +function simpleHash(input: string): string { + let hash = 0; + for (let i = 0; i < input.length; i++) { const char = input.charCodeAt(i); hash = (hash << 5) - hash + char; hash |= 0; // Convert to 32bit integer - } - return hash.toString(16); - } - - const emailMask = (match: string) => `[email-${simpleHash(match)}]`; - const phoneMask = (match: string) => `[phone-${simpleHash(match)}]`; - const nameMask = (match: string) => `[name-${simpleHash(match)}]`; - const ssnMask = (match: string) => `[ssn-${simpleHash(match)}]`; - const creditCardMask = (match: string) => `[creditcard-${simpleHash(match)}]`; - const passportMask = (match: string) => `[passport-${simpleHash(match)}]`; - const licenseMask = (match: string) => `[license-${simpleHash(match)}]`; - const addressMask = (match: string) => `[address-${simpleHash(match)}]`; - const dobMask = (match: string) => `[dob-${simpleHash(match)}]`; - const bankAccountMask = (match: string) => `[bankaccount-${simpleHash(match)}]`; - - // Regular expressions for different types of PII - const patterns = { - email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, - phone: { regex: /\b\d{3}-\d{3}-\d{4}\b/g, mask: phoneMask }, - name: { regex: /\b[A-Z][a-z]+ [A-Z][a-z]+\b/g, mask: nameMask }, - ssn: { regex: /\b\d{3}-\d{2}-\d{4}\b/g, mask: ssnMask }, - creditCard: { regex: /\b(?:\d{4}[ -]?){3}\d{4}\b/g, mask: creditCardMask }, - passport: { regex: /(?i)\b[A-Z]{1,2}\d{6,9}\b/g, mask: passportMask }, - license: { regex: /(?i)\b[A-Z]{1,2}\d{6,8}\b/g, mask: licenseMask }, - address: { + } + return hash.toString(16); +} + +const emailMask = (match: string) => `[email-${simpleHash(match)}]`; +const phoneMask = (match: string) => `[phone-${simpleHash(match)}]`; +const nameMask = (match: string) => `[name-${simpleHash(match)}]`; +const ssnMask = (match: string) => `[ssn-${simpleHash(match)}]`; +const creditCardMask = (match: string) => `[creditcard-${simpleHash(match)}]`; +const passportMask = (match: string) => `[passport-${simpleHash(match)}]`; +const licenseMask = (match: string) => `[license-${simpleHash(match)}]`; +const addressMask = (match: string) => `[address-${simpleHash(match)}]`; +const dobMask = (match: string) => `[dob-${simpleHash(match)}]`; +const bankAccountMask = (match: string) => `[bankaccount-${simpleHash(match)}]`; + +// Regular expressions for different types of PII +const patterns = { + email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, + phone: { regex: /\b\d{3}-\d{3}-\d{4}\b/g, mask: phoneMask }, + name: { regex: /\b[A-Z][a-z]+ [A-Z][a-z]+\b/g, mask: nameMask }, + ssn: { regex: /\b\d{3}-\d{2}-\d{4}\b/g, mask: ssnMask }, + creditCard: { regex: /\b(?:\d{4}[ -]?){3}\d{4}\b/g, mask: creditCardMask }, + passport: { regex: /(?i)\b[A-Z]{1,2}\d{6,9}\b/g, mask: passportMask }, + license: { regex: /(?i)\b[A-Z]{1,2}\d{6,8}\b/g, mask: licenseMask }, + address: { regex: /\b\d{1,5}\s[A-Z][a-z]+(?:\s[A-Z][a-z]+)\*\b/g, mask: addressMask, - }, - dob: { regex: /\b\d{4}-\d{2}-\d{2}\b/g, mask: dobMask }, - bankAccount: { regex: /\b\d{8,17}\b/g, mask: bankAccountMask }, - }; - - // Create a RegexMaskingTransformer with multiple patterns - const piiMaskingTransformer = new RegexMaskingTransformer(patterns); - - // Hooks for different stages of masking and rehydrating - const onMaskingStart = (message: string) => - console.log(`Starting to mask message: ${message}`); - const onMaskingEnd = (maskedMessage: string) => - console.log(`Masked message: ${maskedMessage}`); - const onRehydratingStart = (message: string) => - console.log(`Starting to rehydrate message: ${message}`); - const onRehydratingEnd = (rehydratedMessage: string) => - console.log(`Rehydrated message: ${rehydratedMessage}`); - - // Initialize MaskingParser with the transformer and hooks - const maskingParser = new MaskingParser({ - transformers: [piiMaskingTransformer], - onMaskingStart, - onMaskingEnd, - onRehydratingStart, - onRehydratingEnd, - }); - - // Example message containing multiple types of PII - const message = - "Contact Jane Doe at jane.doe@email.com or 555-123-4567. Her SSN is 123-45-6789 and her credit card number is 1234-5678-9012-3456. Passport number: AB1234567, Driver's License: X1234567, Address: 123 Main St, Date of Birth: 1990-01-01, Bank Account: 12345678901234567."; - - // Mask and rehydrate the message - maskingParser - .parse(message) - .then((maskedMessage: string) => { + }, + dob: { regex: /\b\d{4}-\d{2}-\d{2}\b/g, mask: dobMask }, + bankAccount: { regex: /\b\d{8,17}\b/g, mask: bankAccountMask }, +}; + +// Create a RegexMaskingTransformer with multiple patterns +const piiMaskingTransformer = new RegexMaskingTransformer(patterns); + +// Hooks for different stages of masking and rehydrating +const onMaskingStart = (message: string) => + console.log(`Starting to mask message: ${message}`); +const onMaskingEnd = (maskedMessage: string) => + console.log(`Masked message: ${maskedMessage}`); +const onRehydratingStart = (message: string) => + console.log(`Starting to rehydrate message: ${message}`); +const onRehydratingEnd = (rehydratedMessage: string) => + console.log(`Rehydrated message: ${rehydratedMessage}`); + +// Initialize MaskingParser with the transformer and hooks +const maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + onRehydratingStart, + onRehydratingEnd, +}); + +// Example message containing multiple types of PII +const message = + "Contact Jane Doe at jane.doe@email.com or 555-123-4567. Her SSN is 123-45-6789 and her credit card number is 1234-5678-9012-3456. Passport number: AB1234567, Driver's License: X1234567, Address: 123 Main St, Date of Birth: 1990-01-01, Bank Account: 12345678901234567."; + +// Mask and rehydrate the message +maskingParser + .parse(message) + .then((maskedMessage: string) => { console.log(`Masked message: ${maskedMessage}`); return maskingParser.rehydrate(maskedMessage); - }) - .then((rehydratedMessage: string) => { + }) + .then((rehydratedMessage: string) => { console.log(`Final rehydrated message: ${rehydratedMessage}`); - }); \ No newline at end of file + }); diff --git a/examples/src/experimental/masking/next.ts b/examples/src/experimental/masking/next.ts new file mode 100644 index 000000000000..7be6b77edbcf --- /dev/null +++ b/examples/src/experimental/masking/next.ts @@ -0,0 +1,73 @@ +// app/api/chat + +import { + MaskingParser, + RegexMaskingTransformer, +} from "langchain/experimental/masking"; +import { PromptTemplate } from "langchain/prompts"; +import { ChatOpenAI } from "langchain/chat_models/openai"; +import { BytesOutputParser } from "langchain/schema/output_parser"; + +export const runtime = "edge"; + +// Function to format chat messages for consistency +const formatMessage = (message: any) => { + return `${message.role}: ${message.content}`; +}; + +const CUSTOMER_SUPPORT = `You are a customer support summarizer agent. Always include masked PII in your response. + Current conversation: + {chat_history} + User: {input} + AI:`; + +// Configure Masking Parser +const maskingParser = new MaskingParser(); +// Define transformations for masking emails and phone numbers using regular expressions +const piiMaskingTransformer = new RegexMaskingTransformer({ + email: { regex: /\S+@\S+\.\S+/g }, // If a regex is provided without a mask we fallback to a simple default hashing function + phone: { regex: /\d{3}-\d{3}-\d{4}/g }, +}); + +maskingParser.addTransformer(piiMaskingTransformer); + +export async function POST(req: Request) { + try { + const body = await req.json(); + const messages = body.messages ?? []; + const formattedPreviousMessages = messages.slice(0, -1).map(formatMessage); + const currentMessageContent = messages[messages.length - 1].content; // Extract the content of the last message + // Mask sensitive information in the current message + const guardedMessageContent = await maskingParser.parse( + currentMessageContent + ); + // Mask sensitive information in the chat history + const guardedHistory = await maskingParser.parse( + formattedPreviousMessages.join("\n") + ); + + const prompt = PromptTemplate.fromTemplate(CUSTOMER_SUPPORT); + const model = new ChatOpenAI({ temperature: 0.8 }); + // Initialize an output parser that handles serialization and byte-encoding for streaming + const outputParser = new BytesOutputParser(); + const chain = prompt.pipe(model).pipe(outputParser); // Chain the prompt, model, and output parser together + + console.log("[GUARDED INPUT]", guardedMessageContent); // Contact me at -1157967895 or -1626926859. + console.log("[GUARDED HISTORY]", guardedHistory); // user: Contact me at -1157967895 or -1626926859. assistant: Thank you for providing your contact information. + console.log("[STATE]", maskingParser.getState()); // { '-1157967895' => 'jane.doe@email.com', '-1626926859' => '555-123-4567'} + + // Stream the AI response based on the masked chat history and current message + const stream = await chain.stream({ + chat_history: guardedHistory, + input: guardedMessageContent, + }); + + // npm i ai + // import { StreamingTextResponse } from "ai"; + // return new StreamingTextResponse(stream); + + return; + } catch (e: any) { + return Response.json({ error: e.message }, { status: 500 }); + } +} diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index e8e103540272..9eec3f7cdb7a 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -68,10 +68,8 @@ describe("MaskingParser and PIIMaskingTransformer", () => { describe("Masking with Dynamic Identifiers", () => { let maskingParser: MaskingParser; let piiMaskingTransformer: RegexMaskingTransformer; - const emailMask = (match: string) => - `[email-${Math.random().toString(16).slice(2)}]`; - const phoneMask = (match: string) => - `[phone-${Math.random().toString(16).slice(2)}]`; + const emailMask = () => `[email-${Math.random().toString(16).slice(2)}]`; + const phoneMask = () => `[phone-${Math.random().toString(16).slice(2)}]`; beforeEach(() => { piiMaskingTransformer = new RegexMaskingTransformer({ @@ -178,7 +176,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { // This is just for demonstration purposes and not a secure hashing method. return input .split("") - .map((char) => "*") + .map(() => "*") .join(""); }; it("should mask email and phone using custom hash function", async () => { From ed9bc4090eba6c8a088093f0f4b5fc22d8be41c7 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 14:02:06 -0500 Subject: [PATCH 06/14] fix: wording --- docs/core_docs/docs/modules/index.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/core_docs/docs/modules/index.mdx b/docs/core_docs/docs/modules/index.mdx index 8e8bd58c0e91..fe45f86187ec 100644 --- a/docs/core_docs/docs/modules/index.mdx +++ b/docs/core_docs/docs/modules/index.mdx @@ -32,4 +32,4 @@ Log and stream intermediate steps of any chain #### [Experimental](/docs/modules/experimental/) -Experimental modules not yet ready for production consumption +Experimental modules whose abstractions have not fully settled From 43137ec9302cf990ea1c0368da392f5dd405afea Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 14:02:43 -0500 Subject: [PATCH 07/14] docs: add basic example --- .../docs/modules/experimental/mask/mask.mdx | 46 ++++--------------- examples/src/experimental/masking/basic.ts | 30 ++++++++++++ 2 files changed, 38 insertions(+), 38 deletions(-) create mode 100644 examples/src/experimental/masking/basic.ts diff --git a/docs/core_docs/docs/modules/experimental/mask/mask.mdx b/docs/core_docs/docs/modules/experimental/mask/mask.mdx index d1ffd31da0d8..0ee3db205c7e 100644 --- a/docs/core_docs/docs/modules/experimental/mask/mask.mdx +++ b/docs/core_docs/docs/modules/experimental/mask/mask.mdx @@ -6,53 +6,23 @@ The experimental masking parser and transformer is an extendable module for mask A customer support system receives messages containing sensitive customer information. The system must parse these messages, mask any PII (like names, email addresses, and phone numbers), and log them for analysis while complying with privacy regulations. Before logging the transcript a summary is generated using an llm. -## Example - -Use the RegexMaskingTransformer to create a simple mask for email and phone. - -```typescript -import { - MaskingParser, - RegexMaskingTransformer, -} from "langchain/experimental/masking"; - -// Define masking strategy -const emailMask = () => `[email-${Math.random().toString(16).slice(2)}]`; -const phoneMask = () => `[phone-${Math.random().toString(16).slice(2)}]`; - -// Configure pii transformer -const piiMaskingTransformer = new RegexMaskingTransformer({ - email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, - phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, -}); +## Get started -maskingParser = new MaskingParser(); -maskingParser.addTransformer(piiMaskingTransformer); +import CodeBlock from "@theme/CodeBlock"; +import ExampleBasic from "@examples/chains/llm_chain_stream.ts"; +import ExampleNext from "@examples/experimental/masking/next.ts"; +import ExampleKitchenSink from "@examples/experimental/masking/kitchen_sink.ts"; -const input = - "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; -const masked = await maskingParser.parse(message); +### Basic Example -console.log(masked); -// Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] +Use the RegexMaskingTransformer to create a simple mask for email and phone. -const rehydrated = maskingParser.rehydrate(masked); -console.log(rehydrated); -// Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com -``` +{ExampleBasic} :::note If you plan on storing the masking state to rehydrate the original values asynchronously ensure you are following best security practices. In most cases you will want to define a custom hashing and salting strategy. ::: -## Get started - -import CodeBlock from "@theme/CodeBlock"; -import ExampleKitchenSink from "@examples/experimental/masking/kitchen_sink.ts"; -import ExampleNext from "@examples/experimental/masking/next.ts"; -import ExampleStream from "@examples/chains/llm_chain_stream.ts"; -import ExampleCancellation from "@examples/chains/llm_chain_cancellation.ts"; - ### Next.js stream Example nextjs chat endpoint leveraging the RegexMaskingTransformer. The current chat message and chat message history are masked every time the api is called with a chat payload. diff --git a/examples/src/experimental/masking/basic.ts b/examples/src/experimental/masking/basic.ts new file mode 100644 index 000000000000..77a5c78de2b2 --- /dev/null +++ b/examples/src/experimental/masking/basic.ts @@ -0,0 +1,30 @@ +import { + MaskingParser, + RegexMaskingTransformer, +} from "langchain/experimental/masking"; + +// Define masking strategy +const emailMask = () => `[email-${Math.random().toString(16).slice(2)}]`; +const phoneMask = () => `[phone-${Math.random().toString(16).slice(2)}]`; + +// Configure pii transformer +const piiMaskingTransformer = new RegexMaskingTransformer({ + email: { regex: /\S+@\S+\.\S+/g, mask: emailMask }, + phone: { regex: /\d{3}-\d{3}-\d{4}/g, mask: phoneMask }, +}); + +const maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], +}); +maskingParser.addTransformer(piiMaskingTransformer); + +const input = + "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; +const masked = await maskingParser.parse(input); + +console.log(masked); +// Contact me at [email-a31e486e324f6] or [phone-da8fc1584f224]. Also reach me at [email-d5b6237633d95] + +const rehydrated = maskingParser.rehydrate(masked); +console.log(rehydrated); +// Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com From ce1a3e96432f2b0e757496a5c0cfbaa1de08edd0 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 14:03:21 -0500 Subject: [PATCH 08/14] fix: remove comment and return stream --- examples/src/experimental/masking/next.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/src/experimental/masking/next.ts b/examples/src/experimental/masking/next.ts index 7be6b77edbcf..41e587396ba2 100644 --- a/examples/src/experimental/masking/next.ts +++ b/examples/src/experimental/masking/next.ts @@ -62,11 +62,9 @@ export async function POST(req: Request) { input: guardedMessageContent, }); - // npm i ai - // import { StreamingTextResponse } from "ai"; - // return new StreamingTextResponse(stream); - - return; + return new Response(stream, { + headers: { "content-type": "text/plain; charset=utf-8" }, + }); } catch (e: any) { return Response.json({ error: e.message }, { status: 500 }); } From 81f8557611e98e7f6faa8000e103f7d409da7f1b Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 19:20:35 -0500 Subject: [PATCH 09/14] feat: async hooks, immutable parser state --- langchain/src/experimental/masking/index.ts | 1 + langchain/src/experimental/masking/parser.ts | 89 +++++--- .../masking/regex_masking_transformer.ts | 31 ++- .../masking/tests/masking.test.ts | 207 +++++++++++++++++- .../src/experimental/masking/transformer.ts | 7 +- langchain/src/load/import_map.ts | 1 + 6 files changed, 283 insertions(+), 53 deletions(-) diff --git a/langchain/src/experimental/masking/index.ts b/langchain/src/experimental/masking/index.ts index d8b07444e1d5..6aca47bb0581 100644 --- a/langchain/src/experimental/masking/index.ts +++ b/langchain/src/experimental/masking/index.ts @@ -1,5 +1,6 @@ export { MaskingParser } from "./parser.js"; export { RegexMaskingTransformer } from "./regex_masking_transformer.js"; +export { MaskingTransformer } from "./transformer.js"; export { type MaskingParserConfig, type HashFunction, diff --git a/langchain/src/experimental/masking/parser.ts b/langchain/src/experimental/masking/parser.ts index 197685d67dd5..7a380a8cccdf 100644 --- a/langchain/src/experimental/masking/parser.ts +++ b/langchain/src/experimental/masking/parser.ts @@ -42,7 +42,17 @@ export class MaskingParser { * @throws {Error} If no transformers are added. */ async parse(message: string): Promise { - this.config.onMaskingStart?.(message); + // If onMaskingStart is a function, handle it accordingly + if (this.config.onMaskingStart) { + try { + const result = this.config.onMaskingStart(message) as any; + if (result && typeof result.then === "function") { + await result; + } + } catch (error) { + throw error; // Re-throw the error + } + } // Check if there are any transformers added to the parser. If not, throw an error // as masking requires at least one transformer to apply its logic. @@ -62,19 +72,31 @@ export class MaskingParser { // It starts as the original message and gets transformed by each transformer. let processedMessage = message; - // Iterate through each transformer added to the parser. - this.transformers.forEach((transformer) => { - // Apply the transformer's transform method to the current state of the message. - // The transform method returns a tuple containing the updated message and state. - // The state is a map that tracks the original values of masked content. - // This state is essential for the rehydration process to restore the original message. - [processedMessage, this.state] = transformer.transform( - processedMessage, - this.state - ); - }); + // Iterate through each transformer and apply their transform method. + for (const transformer of this.transformers) { + // Transform the message and get the transformer's state changes, ensuring no direct mutation of the shared state. + const [transformedMessage, transformerState] = + await transformer.transform(processedMessage, new Map(this.state)); + + // Update the processed message for subsequent transformers. + processedMessage = transformedMessage; - this.config.onMaskingEnd?.(processedMessage); + // Merge state changes from the transformer into the parser's state. + // This accumulates all transformations' effects on the state. + transformerState.forEach((value, key) => this.state.set(key, value)); + } + + // Handle onMaskingEnd callback + if (this.config.onMaskingEnd) { + try { + const result = this.config.onMaskingEnd(processedMessage) as any; // Type assertion + if (result && typeof result.then === "function") { + await result; + } + } catch (error) { + throw error; // Re-throw the error + } + } // Return the fully masked message after all transformers have been applied. return processedMessage; } @@ -94,7 +116,14 @@ export class MaskingParser { message: string, state?: Map ): Promise { - this.config.onRehydratingStart?.(message); + // Handle onRehydratingStart callback + if (this.config.onRehydratingStart) { + try { + await this.config.onRehydratingStart(message); + } catch (error) { + throw error; // Re-throw the error + } + } if (typeof message !== "string") { throw new TypeError( @@ -109,6 +138,7 @@ export class MaskingParser { ); } + // eslint-disable-next-line no-instanceof/no-instanceof if (state && !(state instanceof Map)) { throw new TypeError( "MaskingParser.rehydrate Error: The 'state' argument, if provided, must be an instance of Map." @@ -119,20 +149,23 @@ export class MaskingParser { // Initialize the rehydratedMessage with the input masked message. // This variable will undergo rehydration by each transformer in reverse order. let rehydratedMessage = message; - this.transformers - .slice() - .reverse() - .forEach((transformer) => { - // Apply the transformer's rehydrate method to the current state of the message. - // The rehydrate method uses the stored state (this.state) to map masked values - // back to their original values, effectively undoing the masking transformation. - rehydratedMessage = transformer.rehydrate( - rehydratedMessage, - rehydrationState - ); - }); - - this.config.onRehydratingEnd?.(rehydratedMessage); + // Use a reverse for...of loop to accommodate asynchronous rehydrate methods + const reversedTransformers = this.transformers.slice().reverse(); + for (const transformer of reversedTransformers) { + // Check if the result is a Promise and use await, otherwise use it directly + const result = transformer.rehydrate(rehydratedMessage, rehydrationState); + rehydratedMessage = result instanceof Promise ? await result : result; + } + + // Handle onRehydratingEnd callback + if (this.config.onRehydratingEnd) { + try { + await this.config.onRehydratingEnd(rehydratedMessage); + } catch (error) { + throw error; // Re-throw the error + } + } + // Return the fully rehydrated message after all transformers have been applied. return rehydratedMessage; } diff --git a/langchain/src/experimental/masking/regex_masking_transformer.ts b/langchain/src/experimental/masking/regex_masking_transformer.ts index 775cfaa21275..029853ff2875 100644 --- a/langchain/src/experimental/masking/regex_masking_transformer.ts +++ b/langchain/src/experimental/masking/regex_masking_transformer.ts @@ -61,7 +61,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { transform( message: string, state: Map - ): [string, Map] { + ): Promise<[string, Map]> { if (typeof message !== "string") { throw new TypeError( "RegexMaskingTransformer.transform Error: The 'message' argument must be a string." @@ -101,7 +101,9 @@ export class RegexMaskingTransformer extends MaskingTransformer { } // Return the fully masked message and the state map with all original values - return [processedMessage, originalValues]; + // Wrap the synchronous return values in Promise.resolve() to maintain compatibility + // with the MaskingParser's expectation of a Promise return type. + return Promise.resolve([processedMessage, originalValues]); } /** @@ -110,7 +112,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { * @param state - The state map containing mappings of masked values to their original values. * @returns The rehydrated (original) message. */ - rehydrate(message: string, state: Map): string { + rehydrate(message: string, state: Map): Promise { if (typeof message !== "string") { throw new TypeError( "RegexMaskingTransformer.rehydrate Error: The 'message' argument must be a string." @@ -124,15 +126,20 @@ export class RegexMaskingTransformer extends MaskingTransformer { } // Convert the state map to an array and use reduce to sequentially replace masked values with original values. - return Array.from(state).reduce((msg, [masked, original]) => { - // Escape special characters in the masked string to ensure it can be used in a regular expression safely. - // This is necessary because masked values might contain characters that have special meanings in regex. - const escapedMasked = masked.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); - - // Replace all instances of the escaped masked value in the message with the original value. - // The 'g' flag in the RegExp ensures that all occurrences of the masked value are replaced. - return msg.replace(new RegExp(escapedMasked, "g"), original); - }, message); + const rehydratedMessage = Array.from(state).reduce( + (msg, [masked, original]) => { + // Escape special characters in the masked string to ensure it can be used in a regular expression safely. + // This is necessary because masked values might contain characters that have special meanings in regex. + const escapedMasked = masked.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); + + // Replace all instances of the escaped masked value in the message with the original value. + // The 'g' flag in the RegExp ensures that all occurrences of the masked value are replaced. + return msg.replace(new RegExp(escapedMasked, "g"), original); + }, + message + ); + + return Promise.resolve(rehydratedMessage); } /** diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index 9eec3f7cdb7a..de805e43bba5 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -1,5 +1,9 @@ // yarn test:single src/experimental/masking/tests/masking.test.ts -import { MaskingParser, RegexMaskingTransformer } from "../index.js"; +import { + MaskingParser, + RegexMaskingTransformer, + MaskingTransformer, +} from "../index.js"; import { jest } from "@jest/globals"; describe("MaskingParser and PIIMaskingTransformer", () => { @@ -313,9 +317,10 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); }); - it("calls onMaskingStart and onMaskingEnd hooks during parse", async () => { - const onMaskingStart = jest.fn(); - const onMaskingEnd = jest.fn(); + // Masking hooks + it("handles synchronous onMaskingStart and onMaskingEnd hooks during parse", async () => { + const onMaskingStart = jest.fn(); // Synchronous mock + const onMaskingEnd = jest.fn(); // Synchronous mock maskingParser = new MaskingParser({ transformers: [piiMaskingTransformer], @@ -330,9 +335,69 @@ describe("MaskingParser and PIIMaskingTransformer", () => { expect(onMaskingEnd).toHaveBeenCalled(); }); - it("calls onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { - const onRehydratingStart = jest.fn(); - const onRehydratingEnd = jest.fn(); + it("handles asynchronous onMaskingStart and onMaskingEnd hooks during parse", async () => { + const onMaskingStart = jest.fn(() => Promise.resolve()); // Correctly mocked as an async function + const onMaskingEnd = jest.fn(() => Promise.resolve()); // Correctly mocked as an async function + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + }); + + const message = "Contact me at jane.doe@email.com"; + await maskingParser.parse(message); + + expect(onMaskingStart).toHaveBeenCalledWith(message); + expect(onMaskingEnd).toHaveBeenCalled(); + }); + + it("handles errors in synchronous onMaskingStart and onMaskingEnd hooks during parse", async () => { + const error = new Error("Test Error"); + const onMaskingStart = jest.fn(() => { + throw error; + }); // Synchronous mock that throws an error + const onMaskingEnd = jest.fn(() => { + throw error; + }); // Synchronous mock that throws an error + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + }); + + const message = "Contact me at jane.doe@email.com"; + await expect(maskingParser.parse(message)).rejects.toThrow(error); + + expect(onMaskingStart).toHaveBeenCalledWith(message); + // onMaskingEnd should not be called because an error is thrown in onMaskingStart + expect(onMaskingEnd).not.toHaveBeenCalled(); + }); + + it("handles errors in asynchronous onMaskingStart and onMaskingEnd hooks during parse", async () => { + const error = new Error("Test Error"); + const onMaskingStart = jest.fn(() => Promise.reject(error)); // Asynchronous mock that rejects with an error + const onMaskingEnd = jest.fn(() => Promise.reject(error)); // Asynchronous mock that rejects with an error + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onMaskingStart, + onMaskingEnd, + }); + + const message = "Contact me at jane.doe@email.com"; + await expect(maskingParser.parse(message)).rejects.toThrow(error); + + expect(onMaskingStart).toHaveBeenCalledWith(message); + // onMaskingEnd should not be called because an error is thrown in onMaskingStart + expect(onMaskingEnd).not.toHaveBeenCalled(); + }); + + // Rehydration hooks + it("handles synchronous onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { + const onRehydratingStart = jest.fn(); // Synchronous mock + const onRehydratingEnd = jest.fn(); // Synchronous mock maskingParser = new MaskingParser({ transformers: [piiMaskingTransformer], @@ -340,12 +405,132 @@ describe("MaskingParser and PIIMaskingTransformer", () => { onRehydratingEnd, }); - const message = "Contact me at [email]"; - await maskingParser.parse(message); // necessary to populate the state - await maskingParser.rehydrate(message); + const maskedMessage = await maskingParser.parse( + "Contact me at jane.doe@email.com" + ); + await maskingParser.rehydrate(maskedMessage); - expect(onRehydratingStart).toHaveBeenCalledWith(message); + expect(onRehydratingStart).toHaveBeenCalledWith(maskedMessage); expect(onRehydratingEnd).toHaveBeenCalled(); }); + + it("handles asynchronous onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { + const onRehydratingStart = jest.fn(() => Promise.resolve()); // Asynchronous mock + const onRehydratingEnd = jest.fn(() => Promise.resolve()); // Asynchronous mock + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onRehydratingStart, + onRehydratingEnd, + }); + + const maskedMessage = await maskingParser.parse( + "Contact me at jane.doe@email.com" + ); + await maskingParser.rehydrate(maskedMessage); + + expect(onRehydratingStart).toHaveBeenCalledWith(maskedMessage); + expect(onRehydratingEnd).toHaveBeenCalled(); + }); + + it("handles errors in synchronous onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { + const error = new Error("Test Error"); + const onRehydratingStart = jest.fn(() => { + throw error; + }); // Synchronous mock that throws an error + const onRehydratingEnd = jest.fn(() => { + throw error; + }); // Synchronous mock that throws an error + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onRehydratingStart, + onRehydratingEnd, + }); + + const maskedMessage = await maskingParser.parse( + "Contact me at jane.doe@email.com" + ); + await expect(maskingParser.rehydrate(maskedMessage)).rejects.toThrow( + error + ); + + expect(onRehydratingStart).toHaveBeenCalledWith(maskedMessage); + // onRehydratingEnd should not be called because an error is thrown in onRehydratingStart + expect(onRehydratingEnd).not.toHaveBeenCalled(); + }); + + it("handles errors in asynchronous onRehydratingStart and onRehydratingEnd hooks during rehydrate", async () => { + const error = new Error("Test Error"); + const onRehydratingStart = jest.fn(() => Promise.reject(error)); // Asynchronous mock that rejects with an error + const onRehydratingEnd = jest.fn(() => Promise.reject(error)); // Asynchronous mock that rejects with an error + + maskingParser = new MaskingParser({ + transformers: [piiMaskingTransformer], + onRehydratingStart, + onRehydratingEnd, + }); + + const maskedMessage = await maskingParser.parse( + "Contact me at jane.doe@email.com" + ); + await expect(maskingParser.rehydrate(maskedMessage)).rejects.toThrow( + error + ); + + expect(onRehydratingStart).toHaveBeenCalledWith(maskedMessage); + // onRehydratingEnd should not be called because an error is thrown in onRehydratingStart + expect(onRehydratingEnd).not.toHaveBeenCalled(); + }); + }); + + describe("MaskingParser with Asynchronous Transformers", () => { + let maskingParser: MaskingParser; + let asyncTransformer: MaskingTransformer; + + beforeEach(() => { + // Mock an asynchronous transformer + asyncTransformer = { + async transform(message, state) { + // Simulate an asynchronous operation + await new Promise((resolve) => setTimeout(resolve, 100)); + // Return transformed message and updated state + const transformedMessage = message.replace( + /sensitiveData/g, + "[REDACTED]" + ); + const newState = new Map(state).set( + "redacted", + "sensitive string :(" + ); + return [transformedMessage, newState]; + }, + // Mock or placeholder rehydrate method + rehydrate(message, state) { + return Promise.resolve(message); + }, + }; + + maskingParser = new MaskingParser({ + transformers: [asyncTransformer], + // Add other configurations if necessary + }); + }); + + it("properly handles asynchronous transformations and state updates", async () => { + const originalMessage = + "This message contains sensitiveData that should be redacted."; + const transformedMessage = await maskingParser.parse(originalMessage); + + // Check if the message is transformed correctly + expect(transformedMessage).toBe( + "This message contains [REDACTED] that should be redacted." + ); + + // Check if the state is updated correctly + expect(maskingParser.getState().get("redacted")).toBe( + "sensitive string :(" + ); + }); }); }); diff --git a/langchain/src/experimental/masking/transformer.ts b/langchain/src/experimental/masking/transformer.ts index 2ae89cb856d3..a6d42bea00c4 100644 --- a/langchain/src/experimental/masking/transformer.ts +++ b/langchain/src/experimental/masking/transformer.ts @@ -5,6 +5,9 @@ export abstract class MaskingTransformer { abstract transform( message: string, state?: Map - ): [string, Map]; - abstract rehydrate(message: string, state: Map): string; + ): Promise<[string, Map]>; + abstract rehydrate( + message: string, + state: Map + ): Promise; } diff --git a/langchain/src/load/import_map.ts b/langchain/src/load/import_map.ts index a1d511062784..da7e24778135 100644 --- a/langchain/src/load/import_map.ts +++ b/langchain/src/load/import_map.ts @@ -101,6 +101,7 @@ export * as experimental__plan_and_execute from "../experimental/plan_and_execut export * as experimental__chat_models__bittensor from "../experimental/chat_models/bittensor.js"; export * as experimental__chat_models__ollama_functions from "../experimental/chat_models/ollama_functions.js"; export * as experimental__chains__violation_of_expectations from "../experimental/chains/violation_of_expectations/index.js"; +export * as experimental__masking from "../experimental/masking/index.js"; export * as evaluation from "../evaluation/index.js"; export * as runnables from "../runnables/index.js"; export * as runnables__remote from "../runnables/remote.js"; From 94738cc2f5ebb981fa3f6e7251f3e8d62a21cff7 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 19:26:00 -0500 Subject: [PATCH 10/14] fix: parse -> mask --- langchain/src/experimental/masking/parser.ts | 6 +-- .../masking/tests/mask-integration.test.ts | 2 +- .../masking/tests/masking.test.ts | 42 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/langchain/src/experimental/masking/parser.ts b/langchain/src/experimental/masking/parser.ts index 7a380a8cccdf..ec794350d79e 100644 --- a/langchain/src/experimental/masking/parser.ts +++ b/langchain/src/experimental/masking/parser.ts @@ -41,7 +41,7 @@ export class MaskingParser { * @throws {TypeError} If the message is not a string. * @throws {Error} If no transformers are added. */ - async parse(message: string): Promise { + async mask(message: string): Promise { // If onMaskingStart is a function, handle it accordingly if (this.config.onMaskingStart) { try { @@ -58,13 +58,13 @@ export class MaskingParser { // as masking requires at least one transformer to apply its logic. if (this.transformers.length === 0) { throw new Error( - "MaskingParser.parse Error: No transformers have been added. Please add at least one transformer before parsing." + "MaskingParser.mask Error: No transformers have been added. Please add at least one transformer before parsing." ); } if (typeof message !== "string") { throw new TypeError( - "MaskingParser.parse Error: The 'message' argument must be a string." + "MaskingParser.mask Error: The 'message' argument must be a string." ); } diff --git a/langchain/src/experimental/masking/tests/mask-integration.test.ts b/langchain/src/experimental/masking/tests/mask-integration.test.ts index d0b80d1738da..cb8a6bf20b17 100644 --- a/langchain/src/experimental/masking/tests/mask-integration.test.ts +++ b/langchain/src/experimental/masking/tests/mask-integration.test.ts @@ -40,7 +40,7 @@ describe("MaskingParser Integration Test", () => { it("should mask, store state, and rehydrate with altered order", async () => { const originalMessage = "Contact me at jane.doe@email.com or 555-123-4567."; - const maskedMessage = await parser.parse(originalMessage); + const maskedMessage = await parser.mask(originalMessage); // Serialize and store the state const serializedState = serializeState(parser.getState()); diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index de805e43bba5..96bdc7efc1ea 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -27,7 +27,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const message = "Contact me at jane.doe@email.com or 555-123-4567."; const expectedMaskedMessage = "Contact me at [email] or [phone]."; - const maskedMessage = await maskingParser.parse(message); + const maskedMessage = await maskingParser.mask(message); expect(maskedMessage).toBe(expectedMaskedMessage); }); @@ -37,7 +37,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const expectedOriginalMessage = "Contact me at jane.doe@email.com or 555-123-4567."; - await maskingParser.parse(expectedOriginalMessage); // Masking original message + await maskingParser.mask(expectedOriginalMessage); // Masking original message const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); expect(rehydratedMessage).toBe(expectedOriginalMessage); @@ -58,7 +58,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("efficiently processes large data sets", async () => { const largeMessage = generateLargeMessage(); const startTime = performance.now(); - const maskedMessage = await maskingParser.parse(largeMessage); + const maskedMessage = await maskingParser.mask(largeMessage); const endTime = performance.now(); const someAcceptableDuration = 5000; // Set this to a duration you consider acceptable, e.g., 5000 milliseconds (5 seconds) @@ -88,7 +88,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("masks multiple occurrences of different PII with unique identifiers", async () => { const message = "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; - const maskedMessage = await maskingParser.parse(message); + const maskedMessage = await maskingParser.mask(message); expect(maskedMessage).toMatch(/\[email-[a-f0-9]+\]/g); expect(maskedMessage).toMatch(/\[phone-[a-f0-9]+\]/g); @@ -103,7 +103,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("rehydrates dynamic masked data to its original form", async () => { const originalMessage = "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com"; - const maskedMessage = await maskingParser.parse(originalMessage); + const maskedMessage = await maskingParser.mask(originalMessage); const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); expect(rehydratedMessage).toBe(originalMessage); @@ -112,7 +112,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("masks identical PII with consistent dynamic identifiers", async () => { const message = "Contact me at jane.doe@email.com or 555-123-4567. Also reach me at john.smith@email.com and 555-123-4567"; - const maskedMessage = await maskingParser.parse(message); + const maskedMessage = await maskingParser.mask(message); expect(maskedMessage).toMatch(/\[email-[a-f0-9]+\]/g); expect(maskedMessage).toMatch(/\[phone-[a-f0-9]+\]/g); @@ -151,7 +151,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const message = "My email is jane.doe@email.com and phone is 555-123-4567."; - const maskedMessage = await maskingParser.parse(message); + const maskedMessage = await maskingParser.mask(message); expect(maskedMessage).toContain("[email]"); expect(maskedMessage).toContain("[phone]"); @@ -202,7 +202,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { maskingParser.addTransformer(piiMaskingTransformer); const message = "Contact me at jane.doe@email.com or 555-123-4567."; - const maskedMessage = await maskingParser.parse(message); + const maskedMessage = await maskingParser.mask(message); // The lengths of the masked parts should be equal to the lengths of the original email and phone number. const expectedEmailMask = @@ -233,7 +233,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const originalMessage = "Contact me at jane.doe@email.com or 555-123-4567."; - const maskedMessage = await maskingParser.parse(originalMessage); + const maskedMessage = await maskingParser.mask(originalMessage); const rehydratedMessage = await maskingParser.rehydrate(maskedMessage); expect(rehydratedMessage).toBe(originalMessage); @@ -251,8 +251,8 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("throws an error when no transformers are added and parse is called", async () => { const message = "Some message"; - await expect(maskingParser.parse(message)).rejects.toThrow( - "MaskingParser.parse Error: No transformers have been added. Please add at least one transformer before parsing." + await expect(maskingParser.mask(message)).rejects.toThrow( + "MaskingParser.mask Error: No transformers have been added. Please add at least one transformer before parsing." ); }); @@ -266,7 +266,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("throws an error for invalid message type in parse", async () => { const invalidMessage: any = 123; // intentionally incorrect type maskingParser.addTransformer(piiMaskingTransformer); // Add a transformer - await expect(maskingParser.parse(invalidMessage)).rejects.toThrow( + await expect(maskingParser.mask(invalidMessage)).rejects.toThrow( "The 'message' argument must be a string." ); }); @@ -329,7 +329,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); const message = "Contact me at jane.doe@email.com"; - await maskingParser.parse(message); + await maskingParser.mask(message); expect(onMaskingStart).toHaveBeenCalledWith(message); expect(onMaskingEnd).toHaveBeenCalled(); @@ -346,7 +346,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); const message = "Contact me at jane.doe@email.com"; - await maskingParser.parse(message); + await maskingParser.mask(message); expect(onMaskingStart).toHaveBeenCalledWith(message); expect(onMaskingEnd).toHaveBeenCalled(); @@ -368,7 +368,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); const message = "Contact me at jane.doe@email.com"; - await expect(maskingParser.parse(message)).rejects.toThrow(error); + await expect(maskingParser.mask(message)).rejects.toThrow(error); expect(onMaskingStart).toHaveBeenCalledWith(message); // onMaskingEnd should not be called because an error is thrown in onMaskingStart @@ -387,7 +387,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); const message = "Contact me at jane.doe@email.com"; - await expect(maskingParser.parse(message)).rejects.toThrow(error); + await expect(maskingParser.mask(message)).rejects.toThrow(error); expect(onMaskingStart).toHaveBeenCalledWith(message); // onMaskingEnd should not be called because an error is thrown in onMaskingStart @@ -405,7 +405,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { onRehydratingEnd, }); - const maskedMessage = await maskingParser.parse( + const maskedMessage = await maskingParser.mask( "Contact me at jane.doe@email.com" ); await maskingParser.rehydrate(maskedMessage); @@ -424,7 +424,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { onRehydratingEnd, }); - const maskedMessage = await maskingParser.parse( + const maskedMessage = await maskingParser.mask( "Contact me at jane.doe@email.com" ); await maskingParser.rehydrate(maskedMessage); @@ -448,7 +448,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { onRehydratingEnd, }); - const maskedMessage = await maskingParser.parse( + const maskedMessage = await maskingParser.mask( "Contact me at jane.doe@email.com" ); await expect(maskingParser.rehydrate(maskedMessage)).rejects.toThrow( @@ -471,7 +471,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { onRehydratingEnd, }); - const maskedMessage = await maskingParser.parse( + const maskedMessage = await maskingParser.mask( "Contact me at jane.doe@email.com" ); await expect(maskingParser.rehydrate(maskedMessage)).rejects.toThrow( @@ -520,7 +520,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("properly handles asynchronous transformations and state updates", async () => { const originalMessage = "This message contains sensitiveData that should be redacted."; - const transformedMessage = await maskingParser.parse(originalMessage); + const transformedMessage = await maskingParser.mask(originalMessage); // Check if the message is transformed correctly expect(transformedMessage).toBe( From 827d51432da4d4cdef7ea6eb881f1126f03b6f59 Mon Sep 17 00:00:00 2001 From: Jordan Date: Tue, 5 Dec 2023 19:29:14 -0500 Subject: [PATCH 11/14] fix: || -> ?? --- langchain/src/experimental/masking/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/src/experimental/masking/parser.ts b/langchain/src/experimental/masking/parser.ts index ec794350d79e..dc6e15e19948 100644 --- a/langchain/src/experimental/masking/parser.ts +++ b/langchain/src/experimental/masking/parser.ts @@ -10,7 +10,7 @@ export class MaskingParser { private config: MaskingParserConfig; constructor(config: MaskingParserConfig = {}) { - this.transformers = config.transformers || []; + this.transformers = config.transformers ?? []; this.state = new Map(); this.config = config; } From 8e9360e73cc8261eec074c59c3f90603ad2949c4 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 6 Dec 2023 14:15:31 -0800 Subject: [PATCH 12/14] Fix lint, style --- .../src/experimental/masking/kitchen_sink.ts | 2 +- examples/src/experimental/masking/next.ts | 4 +- langchain/src/experimental/masking/parser.ts | 40 ++++---------- .../masking/regex_masking_transformer.ts | 25 +++++---- ...ation.test.ts => masking-extended.test.ts} | 2 +- .../masking/tests/masking.test.ts | 52 +++++++++++-------- .../src/experimental/masking/transformer.ts | 1 + langchain/src/experimental/masking/types.ts | 4 +- 8 files changed, 63 insertions(+), 67 deletions(-) rename langchain/src/experimental/masking/tests/{mask-integration.test.ts => masking-extended.test.ts} (97%) diff --git a/examples/src/experimental/masking/kitchen_sink.ts b/examples/src/experimental/masking/kitchen_sink.ts index f242848e8139..07e85e7fc50f 100644 --- a/examples/src/experimental/masking/kitchen_sink.ts +++ b/examples/src/experimental/masking/kitchen_sink.ts @@ -6,7 +6,7 @@ import { // A simple hash function for demonstration purposes function simpleHash(input: string): string { let hash = 0; - for (let i = 0; i < input.length; i++) { + for (let i = 0; i < input.length; i += 1) { const char = input.charCodeAt(i); hash = (hash << 5) - hash + char; hash |= 0; // Convert to 32bit integer diff --git a/examples/src/experimental/masking/next.ts b/examples/src/experimental/masking/next.ts index 41e587396ba2..85621a4b8dca 100644 --- a/examples/src/experimental/masking/next.ts +++ b/examples/src/experimental/masking/next.ts @@ -11,9 +11,7 @@ import { BytesOutputParser } from "langchain/schema/output_parser"; export const runtime = "edge"; // Function to format chat messages for consistency -const formatMessage = (message: any) => { - return `${message.role}: ${message.content}`; -}; +const formatMessage = (message: any) => `${message.role}: ${message.content}`; const CUSTOMER_SUPPORT = `You are a customer support summarizer agent. Always include masked PII in your response. Current conversation: diff --git a/langchain/src/experimental/masking/parser.ts b/langchain/src/experimental/masking/parser.ts index dc6e15e19948..7592775d3e78 100644 --- a/langchain/src/experimental/masking/parser.ts +++ b/langchain/src/experimental/masking/parser.ts @@ -1,12 +1,14 @@ import { MaskingTransformer } from "./transformer.js"; -import { MaskingParserConfig } from "./types.js"; +import type { MaskingParserConfig } from "./types.js"; /** * MaskingParser class for handling the masking and rehydrating of messages. */ export class MaskingParser { private transformers: MaskingTransformer[]; + private state: Map; + private config: MaskingParserConfig; constructor(config: MaskingParserConfig = {}) { @@ -44,14 +46,7 @@ export class MaskingParser { async mask(message: string): Promise { // If onMaskingStart is a function, handle it accordingly if (this.config.onMaskingStart) { - try { - const result = this.config.onMaskingStart(message) as any; - if (result && typeof result.then === "function") { - await result; - } - } catch (error) { - throw error; // Re-throw the error - } + await this.config.onMaskingStart(message); } // Check if there are any transformers added to the parser. If not, throw an error @@ -88,14 +83,7 @@ export class MaskingParser { // Handle onMaskingEnd callback if (this.config.onMaskingEnd) { - try { - const result = this.config.onMaskingEnd(processedMessage) as any; // Type assertion - if (result && typeof result.then === "function") { - await result; - } - } catch (error) { - throw error; // Re-throw the error - } + await this.config.onMaskingEnd(processedMessage); } // Return the fully masked message after all transformers have been applied. return processedMessage; @@ -118,11 +106,7 @@ export class MaskingParser { ): Promise { // Handle onRehydratingStart callback if (this.config.onRehydratingStart) { - try { - await this.config.onRehydratingStart(message); - } catch (error) { - throw error; // Re-throw the error - } + await this.config.onRehydratingStart(message); } if (typeof message !== "string") { @@ -153,17 +137,15 @@ export class MaskingParser { const reversedTransformers = this.transformers.slice().reverse(); for (const transformer of reversedTransformers) { // Check if the result is a Promise and use await, otherwise use it directly - const result = transformer.rehydrate(rehydratedMessage, rehydrationState); - rehydratedMessage = result instanceof Promise ? await result : result; + rehydratedMessage = await transformer.rehydrate( + rehydratedMessage, + rehydrationState + ); } // Handle onRehydratingEnd callback if (this.config.onRehydratingEnd) { - try { - await this.config.onRehydratingEnd(rehydratedMessage); - } catch (error) { - throw error; // Re-throw the error - } + await this.config.onRehydratingEnd(rehydratedMessage); } // Return the fully rehydrated message after all transformers have been applied. diff --git a/langchain/src/experimental/masking/regex_masking_transformer.ts b/langchain/src/experimental/masking/regex_masking_transformer.ts index 029853ff2875..c52a8b6f83f4 100644 --- a/langchain/src/experimental/masking/regex_masking_transformer.ts +++ b/langchain/src/experimental/masking/regex_masking_transformer.ts @@ -1,10 +1,11 @@ import { MaskingTransformer } from "./transformer.js"; -import { HashFunction, MaskingPattern } from "./types.js"; +import type { HashFunction, MaskingPattern } from "./types.js"; /** * RegexMaskingTransformer class for masking and rehydrating messages with Regex. */ export class RegexMaskingTransformer extends MaskingTransformer { private patterns: { [key: string]: MaskingPattern }; + private hashFunction: HashFunction; /** @@ -38,13 +39,14 @@ export class RegexMaskingTransformer extends MaskingTransformer { * @param patterns - The patterns object to validate. */ private validatePatterns(patterns: { [key: string]: MaskingPattern }) { - for (const key in patterns) { + for (const key of Object.keys(patterns)) { const pattern = patterns[key]; // Checks that each pattern is an object and has a regex property that is an instance of RegExp. // Throws an error if these conditions are not met, indicating an invalid pattern configuration. if ( !pattern || typeof pattern !== "object" || + // eslint-disable-next-line no-instanceof/no-instanceof !(pattern.regex instanceof RegExp) ) { throw new Error("Invalid pattern configuration."); @@ -58,7 +60,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { * @param state - The current state containing original values. * @returns A tuple of the masked message and the updated state. */ - transform( + async transform( message: string, state: Map ): Promise<[string, Map]> { @@ -68,6 +70,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { ); } + // eslint-disable-next-line no-instanceof/no-instanceof if (!(state instanceof Map)) { throw new TypeError( "RegexMaskingTransformer.transform Error: The 'state' argument must be an instance of Map." @@ -78,10 +81,10 @@ export class RegexMaskingTransformer extends MaskingTransformer { let processedMessage = message; // Initialize original values map with the current state or a new map - let originalValues = state || new Map(); + const originalValues = state || new Map(); // Iterate over each pattern defined in the transformer - for (const key in this.patterns) { + for (const key of Object.keys(this.patterns)) { const pattern = this.patterns[key]; // Apply the current pattern's regex to the message @@ -103,7 +106,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { // Return the fully masked message and the state map with all original values // Wrap the synchronous return values in Promise.resolve() to maintain compatibility // with the MaskingParser's expectation of a Promise return type. - return Promise.resolve([processedMessage, originalValues]); + return [processedMessage, originalValues]; } /** @@ -112,13 +115,17 @@ export class RegexMaskingTransformer extends MaskingTransformer { * @param state - The state map containing mappings of masked values to their original values. * @returns The rehydrated (original) message. */ - rehydrate(message: string, state: Map): Promise { + async rehydrate( + message: string, + state: Map + ): Promise { if (typeof message !== "string") { throw new TypeError( "RegexMaskingTransformer.rehydrate Error: The 'message' argument must be a string." ); } + // eslint-disable-next-line no-instanceof/no-instanceof if (!(state instanceof Map)) { throw new TypeError( "RegexMaskingTransformer.rehydrate Error: The 'state' argument must be an instance of Map." @@ -139,7 +146,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { message ); - return Promise.resolve(rehydratedMessage); + return rehydratedMessage; } /** @@ -150,7 +157,7 @@ export class RegexMaskingTransformer extends MaskingTransformer { private defaultHashFunction(input: string): string { let hash = 0; // Iterate over each character in the input string - for (let i = 0; i < input.length; i++) { + for (let i = 0; i < input.length; i += 1) { // Get ASCII value of the character const char = input.charCodeAt(i); // Combine the current hash with the new character and ensure it remains a 32-bit integer diff --git a/langchain/src/experimental/masking/tests/mask-integration.test.ts b/langchain/src/experimental/masking/tests/masking-extended.test.ts similarity index 97% rename from langchain/src/experimental/masking/tests/mask-integration.test.ts rename to langchain/src/experimental/masking/tests/masking-extended.test.ts index cb8a6bf20b17..8342063eae1c 100644 --- a/langchain/src/experimental/masking/tests/mask-integration.test.ts +++ b/langchain/src/experimental/masking/tests/masking-extended.test.ts @@ -1,4 +1,4 @@ -// yarn test:single src/experimental/masking/tests/mask-integration.test.ts +// yarn test:single src/experimental/masking/tests/masking-extended.test.ts import { MaskingParser, RegexMaskingTransformer } from "../index.js"; // Mock database for simulating state storage and retrieval diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index 96bdc7efc1ea..b43966c0d952 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -1,10 +1,12 @@ +/* eslint-disable no-promise-executor-return */ +/* eslint-disable @typescript-eslint/no-explicit-any */ // yarn test:single src/experimental/masking/tests/masking.test.ts +import { jest } from "@jest/globals"; import { MaskingParser, RegexMaskingTransformer, MaskingTransformer, } from "../index.js"; -import { jest } from "@jest/globals"; describe("MaskingParser and PIIMaskingTransformer", () => { describe("Masking with Static Identifiers", () => { @@ -45,7 +47,7 @@ describe("MaskingParser and PIIMaskingTransformer", () => { function generateLargeMessage() { let largeMessage = ""; - for (let i = 0; i < 10000; i++) { + for (let i = 0; i < 10000; i += 1) { // Adjust the number for desired message size largeMessage += `User${i}: jane.doe${i}@email.com, 555-123-${i .toString() @@ -175,14 +177,13 @@ describe("MaskingParser and PIIMaskingTransformer", () => { maskingParser.addTransformer(piiMaskingTransformer); }); - const customHashFunction = (input: string) => { - // A simple hash function that creates a mock hash representation of the input. - // This is just for demonstration purposes and not a secure hashing method. - return input + // A simple hash function that creates a mock hash representation of the input. + // This is just for demonstration purposes and not a secure hashing method. + const customHashFunction = (input: string) => + input .split("") .map(() => "*") .join(""); - }; it("should mask email and phone using custom hash function", async () => { const piiMaskingTransformer = new RegexMaskingTransformer( { @@ -205,10 +206,12 @@ describe("MaskingParser and PIIMaskingTransformer", () => { const maskedMessage = await maskingParser.mask(message); // The lengths of the masked parts should be equal to the lengths of the original email and phone number. - const expectedEmailMask = - "custom-email-" + "*".repeat("jane.doe@email.com".length); - const expectedPhoneMask = - "custom-phone-" + "*".repeat("555-123-4567".length); + const expectedEmailMask = `custom-email-${"*".repeat( + "jane.doe@email.com".length + )}`; + const expectedPhoneMask = `custom-phone-${"*".repeat( + "555-123-4567".length + )}`; expect(maskedMessage).toContain(expectedEmailMask); expect(maskedMessage).toContain(expectedPhoneMask); @@ -280,28 +283,31 @@ describe("MaskingParser and PIIMaskingTransformer", () => { }); describe("Error Handling in PIIMaskingTransformer", () => { - it("throws an error for invalid message type in transform", () => { + it("throws an error for invalid message type in transform", async () => { const transformer = new RegexMaskingTransformer({}); const invalidMessage: any = 123; // intentionally incorrect type const state = new Map(); - expect(() => transformer.transform(invalidMessage, state)).toThrow( - "The 'message' argument must be a string." - ); + await expect( + transformer.transform(invalidMessage, state) + ).rejects.toThrow("The 'message' argument must be a string."); }); - it("throws an error for invalid state type in transform", () => { + it("throws an error for invalid state type in transform", async () => { const transformer = new RegexMaskingTransformer({}); const message = "Some message"; const invalidState: any = {}; // intentionally incorrect type - expect(() => transformer.transform(message, invalidState)).toThrow( - "The 'state' argument must be an instance of Map." - ); + await expect( + transformer.transform(message, invalidState) + ).rejects.toThrow("The 'state' argument must be an instance of Map."); }); it("throws an error when initialized with invalid regex pattern", () => { expect(() => { - // @ts-expect-error - new RegexMaskingTransformer({ invalid: { regex: null } }); + // @ts-expect-error Should throw with invalid regex + const transformer = new RegexMaskingTransformer({ + invalid: { regex: null }, + }); + console.log(transformer); }).toThrow("Invalid pattern configuration."); }); }); @@ -506,8 +512,8 @@ describe("MaskingParser and PIIMaskingTransformer", () => { return [transformedMessage, newState]; }, // Mock or placeholder rehydrate method - rehydrate(message, state) { - return Promise.resolve(message); + async rehydrate(message, _state) { + return message; }, }; diff --git a/langchain/src/experimental/masking/transformer.ts b/langchain/src/experimental/masking/transformer.ts index a6d42bea00c4..05582df2ea8a 100644 --- a/langchain/src/experimental/masking/transformer.ts +++ b/langchain/src/experimental/masking/transformer.ts @@ -6,6 +6,7 @@ export abstract class MaskingTransformer { message: string, state?: Map ): Promise<[string, Map]>; + abstract rehydrate( message: string, state: Map diff --git a/langchain/src/experimental/masking/types.ts b/langchain/src/experimental/masking/types.ts index 4bc475f768c2..70cac7e7b878 100644 --- a/langchain/src/experimental/masking/types.ts +++ b/langchain/src/experimental/masking/types.ts @@ -21,7 +21,9 @@ export type MaskingPattern = { mask?: (match: string) => string; }; -export type HookFunction = (message: string) => void; +export type HookFunction = + | ((message: string) => Promise) + | ((message: string) => void); /** * Represents a function that can hash a string input. From 13d4f19e0459a92c843a4fe941e7a29f349da4a2 Mon Sep 17 00:00:00 2001 From: jacoblee93 Date: Wed, 6 Dec 2023 17:08:13 -0800 Subject: [PATCH 13/14] Fix build --- langchain/src/experimental/masking/tests/masking.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/src/experimental/masking/tests/masking.test.ts b/langchain/src/experimental/masking/tests/masking.test.ts index b43966c0d952..a829a75d4f92 100644 --- a/langchain/src/experimental/masking/tests/masking.test.ts +++ b/langchain/src/experimental/masking/tests/masking.test.ts @@ -303,8 +303,8 @@ describe("MaskingParser and PIIMaskingTransformer", () => { it("throws an error when initialized with invalid regex pattern", () => { expect(() => { - // @ts-expect-error Should throw with invalid regex const transformer = new RegexMaskingTransformer({ + // @ts-expect-error Should throw with invalid regex invalid: { regex: null }, }); console.log(transformer); From 07f2ef6ec3acbebde6272795e9fe207c5c9fa825 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Wed, 6 Dec 2023 17:18:51 -0800 Subject: [PATCH 14/14] Update mask.mdx --- docs/core_docs/docs/modules/experimental/mask/mask.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/core_docs/docs/modules/experimental/mask/mask.mdx b/docs/core_docs/docs/modules/experimental/mask/mask.mdx index 0ee3db205c7e..2827bb708f61 100644 --- a/docs/core_docs/docs/modules/experimental/mask/mask.mdx +++ b/docs/core_docs/docs/modules/experimental/mask/mask.mdx @@ -9,7 +9,7 @@ A customer support system receives messages containing sensitive customer inform ## Get started import CodeBlock from "@theme/CodeBlock"; -import ExampleBasic from "@examples/chains/llm_chain_stream.ts"; +import ExampleBasic from "@examples/experimental/masking/basic.ts"; import ExampleNext from "@examples/experimental/masking/next.ts"; import ExampleKitchenSink from "@examples/experimental/masking/kitchen_sink.ts";