From 04e1efb32826d0cede524b964e7f67099e991c5a Mon Sep 17 00:00:00 2001 From: Rafael Mosca Date: Wed, 25 Sep 2024 19:13:24 +0200 Subject: [PATCH] feat(bedrock): implement new data source structure (#668) * feat(bedrock): add data source implementation and new chuncking strategies --------- Co-authored-by: Alain Krok --- .gitignore | 9 - .npmignore | 2 - .projen/tasks.json | 69 --- apidocs/namespaces/bedrock/README.md | 41 +- .../bedrock/classes/ChunkingStrategy.md | 129 +++++ .../bedrock/classes/ConfluenceDataSource.md | 445 +++++++++++++++ .../bedrock/classes/CustomTransformation.md | 66 +++ .../namespaces/bedrock/classes/DataSource.md | 331 +++++++++++ .../bedrock/classes/DataSourceBase.md | 360 ++++++++++++ .../bedrock/classes/DataSourceNew.md | 409 ++++++++++++++ .../bedrock/classes/KnowledgeBase.md | 358 +++++++++++- .../bedrock/classes/ParsingStategy.md | 64 +++ .../bedrock/classes/S3DataSource.md | 334 ++++++++++- .../bedrock/classes/SalesforceDataSource.md | 441 +++++++++++++++ .../bedrock/classes/SharepointDataSource.md | 449 +++++++++++++++ .../bedrock/classes/WebCrawlerDataSource.md | 441 +++++++++++++++ .../bedrock/enumerations/ChunkingStrategy.md | 42 -- .../ConfluenceDataSourceAuthType.md | 35 ++ .../enumerations/ConfluenceObjectType.md | 40 ++ .../bedrock/enumerations/CrawlingScope.md | 36 ++ .../enumerations/DataDeletionPolicy.md | 28 + .../bedrock/enumerations/DataSourceType.md | 50 ++ .../SalesforceDataSourceAuthType.md | 20 + .../enumerations/SalesforceObjectType.md | 135 +++++ .../SharepointDataSourceAuthType.md | 22 + .../enumerations/SharepointObjectType.md | 33 ++ .../enumerations/TransformationStep.md | 18 + .../interfaces/ConfluenceCrawlingFilters.md | 56 ++ .../ConfluenceDataSourceAssociationProps.md | 197 +++++++ .../interfaces/ConfluenceDataSourceProps.md | 217 ++++++++ .../bedrock/interfaces/CrawlingFilters.md | 26 + .../interfaces/DataSourceAssociationProps.md | 117 ++++ .../FoundationModelParsingStategyProps.md | 32 ++ .../interfaces/HierarchicalChunkingProps.md | 33 ++ .../bedrock/interfaces/IDataSource.md | 97 ++++ .../bedrock/interfaces/IKnowledgeBase.md | 199 +++++++ .../interfaces/KnowledgeBaseAttributes.md | 37 ++ .../bedrock/interfaces/KnowledgeBaseProps.md | 2 +- .../LambdaCustomTransformationProps.md | 32 ++ .../S3DataSourceAssociationProps.md | 167 ++++++ .../bedrock/interfaces/S3DataSourceProps.md | 124 ++++- .../interfaces/SalesforceCrawlingFilters.md | 33 ++ .../SalesforceDataSourceAssociationProps.md | 183 +++++++ .../interfaces/SalesforceDataSourceProps.md | 199 +++++++ .../interfaces/SharepointCrawlingFilters.md | 50 ++ .../SharepointDataSourceAssociationProps.md | 212 +++++++ .../interfaces/SharepointDataSourceProps.md | 236 ++++++++ .../WebCrawlerDataSourceAssociationProps.md | 198 +++++++ .../interfaces/WebCrawlerDataSourceProps.md | 218 ++++++++ package.json | 5 - src/cdk-lib/bedrock/README.md | 238 +++++++- .../bedrock/data-sources/base-data-source.ts | 248 +++++++++ src/cdk-lib/bedrock/data-sources/chunking.ts | 183 +++++++ .../data-sources/confluence-data-source.ts | 240 ++++++++ .../data-sources/custom-transformation.ts | 117 ++++ .../data-sources/default-parsing-prompt.ts | 66 +++ src/cdk-lib/bedrock/data-sources/parsing.ts | 96 ++++ .../bedrock/data-sources/s3-data-source.ts | 141 +++++ .../data-sources/salesforce-data-source.ts | 217 ++++++++ .../data-sources/sharepoint-data-source.ts | 234 ++++++++ .../data-sources/web-crawler-data-source.ts | 190 +++++++ src/cdk-lib/bedrock/index.ts | 10 +- src/cdk-lib/bedrock/knowledge-base.ts | 176 +++++- src/cdk-lib/bedrock/s3-data-source.ts | 219 -------- .../data-sources/other-data-sources.test.ts | 517 ++++++++++++++++++ .../data-sources/s3-data-source.test.ts | 298 ++++++++++ .../bedrock/integ-tests/prompts.integ.ts | 81 --- test/cdk-lib/bedrock/knowledge-base.test.ts | 96 +++- test/cdk-lib/bedrock/s3-data-source.test.ts | 115 ---- 69 files changed, 9920 insertions(+), 639 deletions(-) create mode 100644 apidocs/namespaces/bedrock/classes/ChunkingStrategy.md create mode 100644 apidocs/namespaces/bedrock/classes/ConfluenceDataSource.md create mode 100644 apidocs/namespaces/bedrock/classes/CustomTransformation.md create mode 100644 apidocs/namespaces/bedrock/classes/DataSource.md create mode 100644 apidocs/namespaces/bedrock/classes/DataSourceBase.md create mode 100644 apidocs/namespaces/bedrock/classes/DataSourceNew.md create mode 100644 apidocs/namespaces/bedrock/classes/ParsingStategy.md create mode 100644 apidocs/namespaces/bedrock/classes/SalesforceDataSource.md create mode 100644 apidocs/namespaces/bedrock/classes/SharepointDataSource.md create mode 100644 apidocs/namespaces/bedrock/classes/WebCrawlerDataSource.md delete mode 100644 apidocs/namespaces/bedrock/enumerations/ChunkingStrategy.md create mode 100644 apidocs/namespaces/bedrock/enumerations/ConfluenceDataSourceAuthType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/ConfluenceObjectType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/CrawlingScope.md create mode 100644 apidocs/namespaces/bedrock/enumerations/DataDeletionPolicy.md create mode 100644 apidocs/namespaces/bedrock/enumerations/DataSourceType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/SalesforceDataSourceAuthType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/SalesforceObjectType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/SharepointDataSourceAuthType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/SharepointObjectType.md create mode 100644 apidocs/namespaces/bedrock/enumerations/TransformationStep.md create mode 100644 apidocs/namespaces/bedrock/interfaces/ConfluenceCrawlingFilters.md create mode 100644 apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/CrawlingFilters.md create mode 100644 apidocs/namespaces/bedrock/interfaces/DataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/FoundationModelParsingStategyProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/HierarchicalChunkingProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/IDataSource.md create mode 100644 apidocs/namespaces/bedrock/interfaces/IKnowledgeBase.md create mode 100644 apidocs/namespaces/bedrock/interfaces/KnowledgeBaseAttributes.md create mode 100644 apidocs/namespaces/bedrock/interfaces/LambdaCustomTransformationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/S3DataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SalesforceCrawlingFilters.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SharepointCrawlingFilters.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SharepointDataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/SharepointDataSourceProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceAssociationProps.md create mode 100644 apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceProps.md create mode 100644 src/cdk-lib/bedrock/data-sources/base-data-source.ts create mode 100644 src/cdk-lib/bedrock/data-sources/chunking.ts create mode 100644 src/cdk-lib/bedrock/data-sources/confluence-data-source.ts create mode 100644 src/cdk-lib/bedrock/data-sources/custom-transformation.ts create mode 100644 src/cdk-lib/bedrock/data-sources/default-parsing-prompt.ts create mode 100644 src/cdk-lib/bedrock/data-sources/parsing.ts create mode 100644 src/cdk-lib/bedrock/data-sources/s3-data-source.ts create mode 100644 src/cdk-lib/bedrock/data-sources/salesforce-data-source.ts create mode 100644 src/cdk-lib/bedrock/data-sources/sharepoint-data-source.ts create mode 100644 src/cdk-lib/bedrock/data-sources/web-crawler-data-source.ts delete mode 100644 src/cdk-lib/bedrock/s3-data-source.ts create mode 100644 test/cdk-lib/bedrock/data-sources/other-data-sources.test.ts create mode 100644 test/cdk-lib/bedrock/data-sources/s3-data-source.test.ts delete mode 100644 test/cdk-lib/bedrock/integ-tests/prompts.integ.ts delete mode 100644 test/cdk-lib/bedrock/s3-data-source.test.ts diff --git a/.gitignore b/.gitignore index 90f1bd2c..ac3afd58 100644 --- a/.gitignore +++ b/.gitignore @@ -56,15 +56,6 @@ junit.xml !/.eslintrc.json .jsii tsconfig.json -test/cdk-lib/bedrock/integ-tests/.tmp -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/asset.* -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/**/asset.* -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/cdk.out -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/**/cdk.out -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/manifest.json -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/**/manifest.json -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/tree.json -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/**/tree.json test/patterns/gen-ai/aws-aoss-cw-dashboard/integ-tests/.tmp test/patterns/gen-ai/aws-aoss-cw-dashboard/integ-tests/aws-aoss-cw-dashboard.integ.snapshot/asset.* test/patterns/gen-ai/aws-aoss-cw-dashboard/integ-tests/aws-aoss-cw-dashboard.integ.snapshot/**/asset.* diff --git a/.npmignore b/.npmignore index 8fa5f3ec..b523d851 100644 --- a/.npmignore +++ b/.npmignore @@ -22,8 +22,6 @@ dist tsconfig.tsbuildinfo /.eslintrc.json !.jsii -test/cdk-lib/bedrock/integ-tests/.tmp -test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot test/patterns/gen-ai/aws-aoss-cw-dashboard/integ-tests/.tmp test/patterns/gen-ai/aws-aoss-cw-dashboard/integ-tests/aws-aoss-cw-dashboard.integ.snapshot /docs/ diff --git a/.projen/tasks.json b/.projen/tasks.json index d52b00b9..931ef94d 100644 --- a/.projen/tasks.json +++ b/.projen/tasks.json @@ -227,76 +227,10 @@ } ] }, - "integ:prompts:assert": { - "name": "integ:prompts:assert", - "description": "assert the snapshot of integration test 'prompts'", - "steps": [ - { - "exec": "[ -d \"test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot\" ] || (echo \"No snapshot available for integration test 'prompts'. Run 'projen integ:prompts:deploy' to capture.\" && exit 1)" - }, - { - "exec": "cdk synth --app \"ts-node -P tsconfig.dev.json test/cdk-lib/bedrock/integ-tests/prompts.integ.ts\" --no-notices --no-version-reporting --no-asset-metadata --no-path-metadata -o test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/assert.cdk.out > /dev/null" - }, - { - "exec": "diff -r -x asset.* -x cdk.out -x manifest.json -x tree.json test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot/ test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/assert.cdk.out/" - } - ] - }, - "integ:prompts:deploy": { - "name": "integ:prompts:deploy", - "description": "deploy integration test 'prompts' and capture snapshot", - "steps": [ - { - "exec": "rm -fr test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/deploy.cdk.out" - }, - { - "exec": "cdk deploy --app \"ts-node -P tsconfig.dev.json test/cdk-lib/bedrock/integ-tests/prompts.integ.ts\" --no-notices --no-version-reporting --no-asset-metadata --no-path-metadata '**' --require-approval=never -o test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/deploy.cdk.out" - }, - { - "exec": "rm -fr test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot" - }, - { - "exec": "mv test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/deploy.cdk.out test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot" - }, - { - "spawn": "integ:prompts:destroy" - } - ] - }, - "integ:prompts:destroy": { - "name": "integ:prompts:destroy", - "description": "destroy integration test 'prompts'", - "steps": [ - { - "exec": "cdk destroy --app test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot '**' --no-version-reporting" - } - ] - }, - "integ:prompts:snapshot": { - "name": "integ:prompts:snapshot", - "description": "update snapshot for integration test \"prompts\"", - "steps": [ - { - "exec": "cdk synth --app \"ts-node -P tsconfig.dev.json test/cdk-lib/bedrock/integ-tests/prompts.integ.ts\" --no-notices --no-version-reporting --no-asset-metadata --no-path-metadata -o test/cdk-lib/bedrock/integ-tests/prompts.integ.snapshot > /dev/null" - } - ] - }, - "integ:prompts:watch": { - "name": "integ:prompts:watch", - "description": "watch integration test 'prompts' (without updating snapshots)", - "steps": [ - { - "exec": "cdk watch --app \"ts-node -P tsconfig.dev.json test/cdk-lib/bedrock/integ-tests/prompts.integ.ts\" --no-notices --no-version-reporting --no-asset-metadata --no-path-metadata '**' -o test/cdk-lib/bedrock/integ-tests/.tmp/prompts.integ/deploy.cdk.out" - } - ] - }, "integ:snapshot-all": { "name": "integ:snapshot-all", "description": "update snapshot for all integration tests", "steps": [ - { - "spawn": "integ:prompts:snapshot" - }, { "spawn": "integ:aws-aoss-cw-dashboard:snapshot" } @@ -439,9 +373,6 @@ { "spawn": "eslint" }, - { - "spawn": "integ:prompts:assert" - }, { "spawn": "integ:aws-aoss-cw-dashboard:assert" } diff --git a/apidocs/namespaces/bedrock/README.md b/apidocs/namespaces/bedrock/README.md index 241d82a9..949695f7 100644 --- a/apidocs/namespaces/bedrock/README.md +++ b/apidocs/namespaces/bedrock/README.md @@ -11,8 +11,12 @@ ### Enumerations - [CanadaSpecific](enumerations/CanadaSpecific.md) -- [ChunkingStrategy](enumerations/ChunkingStrategy.md) +- [ConfluenceDataSourceAuthType](enumerations/ConfluenceDataSourceAuthType.md) +- [ConfluenceObjectType](enumerations/ConfluenceObjectType.md) - [ContextualGroundingFilterConfigType](enumerations/ContextualGroundingFilterConfigType.md) +- [CrawlingScope](enumerations/CrawlingScope.md) +- [DataDeletionPolicy](enumerations/DataDeletionPolicy.md) +- [DataSourceType](enumerations/DataSourceType.md) - [FiltersConfigStrength](enumerations/FiltersConfigStrength.md) - [FiltersConfigType](enumerations/FiltersConfigType.md) - [Finance](enumerations/Finance.md) @@ -24,6 +28,11 @@ - [PromptState](enumerations/PromptState.md) - [PromptTemplateType](enumerations/PromptTemplateType.md) - [PromptType](enumerations/PromptType.md) +- [SalesforceDataSourceAuthType](enumerations/SalesforceDataSourceAuthType.md) +- [SalesforceObjectType](enumerations/SalesforceObjectType.md) +- [SharePointDataSourceAuthType](enumerations/SharePointDataSourceAuthType.md) +- [SharePointObjectType](enumerations/SharePointObjectType.md) +- [TransformationStep](enumerations/TransformationStep.md) - [UKSpecific](enumerations/UKSpecific.md) - [USASpecific](enumerations/USASpecific.md) @@ -34,18 +43,28 @@ - [AgentAlias](classes/AgentAlias.md) - [ApiSchema](classes/ApiSchema.md) - [BedrockFoundationModel](classes/BedrockFoundationModel.md) +- [ChunkingStrategy](classes/ChunkingStrategy.md) +- [ConfluenceDataSource](classes/ConfluenceDataSource.md) - [ContentPolicyConfig](classes/ContentPolicyConfig.md) +- [CustomTransformation](classes/CustomTransformation.md) +- [DataSource](classes/DataSource.md) +- [DataSourceBase](classes/DataSourceBase.md) +- [DataSourceNew](classes/DataSourceNew.md) - [Guardrail](classes/Guardrail.md) - [GuardrailVersion](classes/GuardrailVersion.md) - [InlineApiSchema](classes/InlineApiSchema.md) - [KnowledgeBase](classes/KnowledgeBase.md) +- [ParsingStategy](classes/ParsingStategy.md) - [Prompt](classes/Prompt.md) - [PromptVariant](classes/PromptVariant.md) - [PromptVersion](classes/PromptVersion.md) - [S3ApiSchema](classes/S3ApiSchema.md) - [S3DataSource](classes/S3DataSource.md) +- [SalesforceDataSource](classes/SalesforceDataSource.md) - [SensitiveInformationPolicyConfig](classes/SensitiveInformationPolicyConfig.md) +- [SharePointDataSource](classes/SharePointDataSource.md) - [Topic](classes/Topic.md) +- [WebCrawlerDataSource](classes/WebCrawlerDataSource.md) ### Interfaces @@ -57,23 +76,43 @@ - [ApiSchemaConfig](interfaces/ApiSchemaConfig.md) - [BedrockFoundationModelProps](interfaces/BedrockFoundationModelProps.md) - [CommonPromptVariantProps](interfaces/CommonPromptVariantProps.md) +- [ConfluenceCrawlingFilters](interfaces/ConfluenceCrawlingFilters.md) +- [ConfluenceDataSourceAssociationProps](interfaces/ConfluenceDataSourceAssociationProps.md) +- [ConfluenceDataSourceProps](interfaces/ConfluenceDataSourceProps.md) - [ContentPolicyConfigProps](interfaces/ContentPolicyConfigProps.md) - [ContextualGroundingPolicyConfigProps](interfaces/ContextualGroundingPolicyConfigProps.md) +- [CrawlingFilters](interfaces/CrawlingFilters.md) +- [DataSourceAssociationProps](interfaces/DataSourceAssociationProps.md) +- [FoundationModelParsingStategyProps](interfaces/FoundationModelParsingStategyProps.md) - [GuardrailConfiguration](interfaces/GuardrailConfiguration.md) - [GuardrailProps](interfaces/GuardrailProps.md) +- [HierarchicalChunkingProps](interfaces/HierarchicalChunkingProps.md) - [IAgentAlias](interfaces/IAgentAlias.md) +- [IDataSource](interfaces/IDataSource.md) +- [IKnowledgeBase](interfaces/IKnowledgeBase.md) - [InferenceConfiguration](interfaces/InferenceConfiguration.md) - [IPrompt](interfaces/IPrompt.md) +- [KnowledgeBaseAttributes](interfaces/KnowledgeBaseAttributes.md) - [KnowledgeBaseProps](interfaces/KnowledgeBaseProps.md) +- [LambdaCustomTransformationProps](interfaces/LambdaCustomTransformationProps.md) - [PromptConfiguration](interfaces/PromptConfiguration.md) - [PromptOverrideConfiguration](interfaces/PromptOverrideConfiguration.md) - [PromptProps](interfaces/PromptProps.md) - [PromptVersionProps](interfaces/PromptVersionProps.md) +- [S3DataSourceAssociationProps](interfaces/S3DataSourceAssociationProps.md) - [S3DataSourceProps](interfaces/S3DataSourceProps.md) - [S3Identifier](interfaces/S3Identifier.md) +- [SalesforceCrawlingFilters](interfaces/SalesforceCrawlingFilters.md) +- [SalesforceDataSourceAssociationProps](interfaces/SalesforceDataSourceAssociationProps.md) +- [SalesforceDataSourceProps](interfaces/SalesforceDataSourceProps.md) - [SensitiveInformationPolicyConfigProps](interfaces/SensitiveInformationPolicyConfigProps.md) +- [SharePointCrawlingFilters](interfaces/SharePointCrawlingFilters.md) +- [SharePointDataSourceAssociationProps](interfaces/SharePointDataSourceAssociationProps.md) +- [SharePointDataSourceProps](interfaces/SharePointDataSourceProps.md) - [TextPromptVariantProps](interfaces/TextPromptVariantProps.md) - [TopicProps](interfaces/TopicProps.md) +- [WebCrawlerDataSourceAssociationProps](interfaces/WebCrawlerDataSourceAssociationProps.md) +- [WebCrawlerDataSourceProps](interfaces/WebCrawlerDataSourceProps.md) ### Functions diff --git a/apidocs/namespaces/bedrock/classes/ChunkingStrategy.md b/apidocs/namespaces/bedrock/classes/ChunkingStrategy.md new file mode 100644 index 00000000..5749cdb4 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/ChunkingStrategy.md @@ -0,0 +1,129 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ChunkingStrategy + +# Class: `abstract` ChunkingStrategy + +## Properties + +### configuration + +> `abstract` **configuration**: `ChunkingConfigurationProperty` + +The CloudFormation property representation of this configuration + +*** + +### DEFAULT + +> `readonly` `static` **DEFAULT**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Fixed Sized Chunking with the default chunk size of 300 tokens and 20% overlap. + +*** + +### FIXED\_SIZE + +> `readonly` `static` **FIXED\_SIZE**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Fixed Sized Chunking with the default chunk size of 300 tokens and 20% overlap. +You can adjust these values based on your specific requirements using the +`ChunkingStrategy.fixedSize(params)` method. + +*** + +### HIERARCHICAL\_COHERE + +> `readonly` `static` **HIERARCHICAL\_COHERE**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Hierarchical Chunking with the default for Cohere Models. +- Overlap tokens: 30 +- Max parent token size: 500 +- Max child token size: 100 + +*** + +### HIERARCHICAL\_TITAN + +> `readonly` `static` **HIERARCHICAL\_TITAN**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Hierarchical Chunking with the default for Titan Models. +- Overlap tokens: 60 +- Max parent token size: 1500 +- Max child token size: 300 + +*** + +### NONE + +> `readonly` `static` **NONE**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Amazon Bedrock treats each file as one chunk. Suitable for documents that +are already pre-processed or text split. + +*** + +### SEMANTIC + +> `readonly` `static` **SEMANTIC**: [`ChunkingStrategy`](ChunkingStrategy.md) + +Semantic Chunking with the default of bufferSize: 0, +breakpointPercentileThreshold: 95, and maxTokens: 300. +You can adjust these values based on your specific requirements using the +`ChunkingStrategy.semantic(params)` method. + +## Methods + +### fixedSize() + +> `static` **fixedSize**(`props`): [`ChunkingStrategy`](ChunkingStrategy.md) + +Method for customizing a fixed sized chunking strategy. + +#### Parameters + +• **props**: `FixedSizeChunkingConfigurationProperty` + +#### Returns + +[`ChunkingStrategy`](ChunkingStrategy.md) + +*** + +### hierarchical() + +> `static` **hierarchical**(`props`): [`ChunkingStrategy`](ChunkingStrategy.md) + +Method for customizing a hierarchical chunking strategy. +For custom chunking, the maximum token chunk size depends on the model. +- Amazon Titan Text Embeddings: 8192 +- Cohere Embed models: 512 + +#### Parameters + +• **props**: [`HierarchicalChunkingProps`](../interfaces/HierarchicalChunkingProps.md) + +#### Returns + +[`ChunkingStrategy`](ChunkingStrategy.md) + +*** + +### semantic() + +> `static` **semantic**(`props`): [`ChunkingStrategy`](ChunkingStrategy.md) + +Method for customizing a semantic chunking strategy. +For custom chunking, the maximum token chunk size depends on the model. +- Amazon Titan Text Embeddings: 8192 +- Cohere Embed models: 512 + +#### Parameters + +• **props**: `SemanticChunkingConfigurationProperty` + +#### Returns + +[`ChunkingStrategy`](ChunkingStrategy.md) diff --git a/apidocs/namespaces/bedrock/classes/ConfluenceDataSource.md b/apidocs/namespaces/bedrock/classes/ConfluenceDataSource.md new file mode 100644 index 00000000..ab1c8cae --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/ConfluenceDataSource.md @@ -0,0 +1,445 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceDataSource + +# Class: ConfluenceDataSource + +Sets up a Confluence Data Source to be added to a knowledge base. + +## See + +https://docs.aws.amazon.com/bedrock/latest/userguide/confluence-data-source-connector.html + +## Extends + +- [`DataSourceNew`](DataSourceNew.md) + +## Constructors + +### new ConfluenceDataSource() + +> **new ConfluenceDataSource**(`scope`, `id`, `props`): [`ConfluenceDataSource`](ConfluenceDataSource.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props**: [`ConfluenceDataSourceProps`](../interfaces/ConfluenceDataSourceProps.md) + +#### Returns + +[`ConfluenceDataSource`](ConfluenceDataSource.md) + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`constructor`](DataSourceNew.md#constructors) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials. + +*** + +### confluenceUrl + +> `readonly` **confluenceUrl**: `string` + +The Confluence host URL or instance URL. + +*** + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceId`](DataSourceNew.md#datasourceid) + +*** + +### dataSourceName + +> `readonly` **dataSourceName**: `string` + +The name of the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceName`](DataSourceNew.md#datasourcename) + +*** + +### dataSourceType + +> `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceType`](DataSourceNew.md#datasourcetype) + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`env`](DataSourceNew.md#env) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`kmsKey`](DataSourceNew.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`knowledgeBase`](DataSourceNew.md#knowledgebase) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`node`](DataSourceNew.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`physicalName`](DataSourceNew.md#physicalname) + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`stack`](DataSourceNew.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`_enableCrossEnvironment`](DataSourceNew.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`applyRemovalPolicy`](DataSourceNew.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`formatAsCfnProps`](DataSourceNew.md#formatascfnprops) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`generatePhysicalName`](DataSourceNew.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceArnAttribute`](DataSourceNew.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceNameAttribute`](DataSourceNew.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`handleCommonPermissions`](DataSourceNew.md#handlecommonpermissions) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`toString`](DataSourceNew.md#tostring) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isConstruct`](DataSourceNew.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isOwnedResource`](DataSourceNew.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isResource`](DataSourceNew.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/CustomTransformation.md b/apidocs/namespaces/bedrock/classes/CustomTransformation.md new file mode 100644 index 00000000..374b19f1 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/CustomTransformation.md @@ -0,0 +1,66 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / CustomTransformation + +# Class: `abstract` CustomTransformation + +Represents a custom transformation configuration for a data source ingestion. + +## See + +https://docs.aws.amazon.com/bedrock/latest/userguide/kb-chunking-parsing.html#kb-custom-transformation + +## Constructors + +### new CustomTransformation() + +> **new CustomTransformation**(): [`CustomTransformation`](CustomTransformation.md) + +#### Returns + +[`CustomTransformation`](CustomTransformation.md) + +## Properties + +### configuration + +> `abstract` **configuration**: `CustomTransformationConfigurationProperty` + +The CloudFormation property representation of this custom transformation configuration. + +## Methods + +### generatePolicyStatements() + +> `abstract` **generatePolicyStatements**(`scope`): `PolicyStatement`[] + +#### Parameters + +• **scope**: `Construct` + +#### Returns + +`PolicyStatement`[] + +*** + +### lambda() + +> `static` **lambda**(`props`): [`CustomTransformation`](CustomTransformation.md) + +This feature allows you to use a Lambda function to inject your own logic +into the knowledge base ingestion process. + +#### Parameters + +• **props**: [`LambdaCustomTransformationProps`](../interfaces/LambdaCustomTransformationProps.md) + +#### Returns + +[`CustomTransformation`](CustomTransformation.md) + +#### See + +https://github.com/aws-samples/amazon-bedrock-samples/blob/main/knowledge-bases/features-examples/02-optimizing-accuracy-retrieved-results/advanced_chunking_options.ipynb diff --git a/apidocs/namespaces/bedrock/classes/DataSource.md b/apidocs/namespaces/bedrock/classes/DataSource.md new file mode 100644 index 00000000..b333233e --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/DataSource.md @@ -0,0 +1,331 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataSource + +# Class: DataSource + +Specifies the base class for all data source resources (imported and new). + +## Extends + +- [`DataSourceBase`](DataSourceBase.md) + +## Properties + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceBase`](DataSourceBase.md).[`dataSourceId`](DataSourceBase.md#datasourceid) + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`env`](DataSourceBase.md#env) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`node`](DataSourceBase.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`physicalName`](DataSourceBase.md#physicalname) + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`stack`](DataSourceBase.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`_enableCrossEnvironment`](DataSourceBase.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`applyRemovalPolicy`](DataSourceBase.md#applyremovalpolicy) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`generatePhysicalName`](DataSourceBase.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`getResourceArnAttribute`](DataSourceBase.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`getResourceNameAttribute`](DataSourceBase.md#getresourcenameattribute) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`toString`](DataSourceBase.md#tostring) + +*** + +### fromDataSourceId() + +> `static` **fromDataSourceId**(`scope`, `id`, `dataSourceId`): [`IDataSource`](../interfaces/IDataSource.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **dataSourceId**: `string` + +#### Returns + +[`IDataSource`](../interfaces/IDataSource.md) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isConstruct`](DataSourceBase.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isOwnedResource`](DataSourceBase.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isResource`](DataSourceBase.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/DataSourceBase.md b/apidocs/namespaces/bedrock/classes/DataSourceBase.md new file mode 100644 index 00000000..4967b367 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/DataSourceBase.md @@ -0,0 +1,360 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataSourceBase + +# Class: `abstract` DataSourceBase + +Specifies the base class for all data source resources (imported and new). + +## Extends + +- `Resource` + +## Extended by + +- [`DataSourceNew`](DataSourceNew.md) +- [`DataSource`](DataSource.md) + +## Implements + +- [`IDataSource`](../interfaces/IDataSource.md) + +## Constructors + +### new DataSourceBase() + +> **new DataSourceBase**(`scope`, `id`, `props`?): [`DataSourceBase`](DataSourceBase.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props?**: `ResourceProps` + +#### Returns + +[`DataSourceBase`](DataSourceBase.md) + +#### Inherited from + +`Resource.constructor` + +## Properties + +### dataSourceId + +> `abstract` `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Implementation of + +[`IDataSource`](../interfaces/IDataSource.md).[`dataSourceId`](../interfaces/IDataSource.md#datasourceid) + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Implementation of + +[`IDataSource`](../interfaces/IDataSource.md).[`env`](../interfaces/IDataSource.md#env) + +#### Inherited from + +`Resource.env` + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Implementation of + +[`IDataSource`](../interfaces/IDataSource.md).[`node`](../interfaces/IDataSource.md#node) + +#### Inherited from + +`Resource.node` + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +`Resource.physicalName` + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Implementation of + +[`IDataSource`](../interfaces/IDataSource.md).[`stack`](../interfaces/IDataSource.md#stack) + +#### Inherited from + +`Resource.stack` + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +`Resource._enableCrossEnvironment` + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Implementation of + +[`IDataSource`](../interfaces/IDataSource.md).[`applyRemovalPolicy`](../interfaces/IDataSource.md#applyremovalpolicy) + +#### Inherited from + +`Resource.applyRemovalPolicy` + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +`Resource.generatePhysicalName` + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +`Resource.getResourceArnAttribute` + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +`Resource.getResourceNameAttribute` + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +`Resource.toString` + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +`Resource.isConstruct` + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +`Resource.isOwnedResource` + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +`Resource.isResource` diff --git a/apidocs/namespaces/bedrock/classes/DataSourceNew.md b/apidocs/namespaces/bedrock/classes/DataSourceNew.md new file mode 100644 index 00000000..b333d5b7 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/DataSourceNew.md @@ -0,0 +1,409 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataSourceNew + +# Class: `abstract` DataSourceNew + +Specifies the base class for all NEW data source resources of ANY type. + +## Extends + +- [`DataSourceBase`](DataSourceBase.md) + +## Extended by + +- [`WebCrawlerDataSource`](WebCrawlerDataSource.md) +- [`SharePointDataSource`](SharePointDataSource.md) +- [`ConfluenceDataSource`](ConfluenceDataSource.md) +- [`SalesforceDataSource`](SalesforceDataSource.md) +- [`S3DataSource`](S3DataSource.md) + +## Constructors + +### new DataSourceNew() + +> **new DataSourceNew**(`scope`, `id`, `props`?): [`DataSourceNew`](DataSourceNew.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props?**: `ResourceProps` + +#### Returns + +[`DataSourceNew`](DataSourceNew.md) + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`constructor`](DataSourceBase.md#constructors) + +## Properties + +### dataSourceId + +> `abstract` `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceBase`](DataSourceBase.md).[`dataSourceId`](DataSourceBase.md#datasourceid) + +*** + +### dataSourceName + +> `abstract` `readonly` **dataSourceName**: `string` + +The name of the data source. + +*** + +### dataSourceType + +> `abstract` `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`env`](DataSourceBase.md#env) + +*** + +### kmsKey? + +> `abstract` `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +*** + +### knowledgeBase + +> `abstract` `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`node`](DataSourceBase.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`physicalName`](DataSourceBase.md#physicalname) + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`stack`](DataSourceBase.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`_enableCrossEnvironment`](DataSourceBase.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`applyRemovalPolicy`](DataSourceBase.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`generatePhysicalName`](DataSourceBase.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`getResourceArnAttribute`](DataSourceBase.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`getResourceNameAttribute`](DataSourceBase.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`toString`](DataSourceBase.md#tostring) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isConstruct`](DataSourceBase.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isOwnedResource`](DataSourceBase.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceBase`](DataSourceBase.md).[`isResource`](DataSourceBase.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/KnowledgeBase.md b/apidocs/namespaces/bedrock/classes/KnowledgeBase.md index 5a6ea3ac..c410fa8b 100644 --- a/apidocs/namespaces/bedrock/classes/KnowledgeBase.md +++ b/apidocs/namespaces/bedrock/classes/KnowledgeBase.md @@ -11,7 +11,7 @@ Pinecone, Redis Enterprise Cloud or Amazon Aurora PostgreSQL. ## Extends -- `Construct` +- `KnowledgeBaseBase` ## Constructors @@ -33,7 +33,7 @@ Pinecone, Redis Enterprise Cloud or Amazon Aurora PostgreSQL. #### Overrides -`Construct.constructor` +`KnowledgeBaseBase.constructor` ## Properties @@ -45,6 +45,24 @@ The description knowledge base. *** +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +`KnowledgeBaseBase.env` + +*** + ### instruction? > `readonly` `optional` **instruction**: `string` @@ -59,6 +77,10 @@ A narrative instruction of the knowledge base. The ARN of the knowledge base. +#### Overrides + +`KnowledgeBaseBase.knowledgeBaseArn` + *** ### knowledgeBaseId @@ -67,6 +89,10 @@ The ARN of the knowledge base. The ID of the knowledge base. +#### Overrides + +`KnowledgeBaseBase.knowledgeBaseId` + *** ### knowledgeBaseInstance @@ -101,16 +127,51 @@ The tree node. #### Inherited from -`Construct.node` +`KnowledgeBaseBase.node` + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +`KnowledgeBaseBase.physicalName` *** ### role -> `readonly` **role**: `Role` +> `readonly` **role**: `IRole` The role the Knowledge Base uses to access the vector store and data source. +#### Overrides + +`KnowledgeBaseBase.role` + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +`KnowledgeBaseBase.stack` + *** ### vectorStore @@ -121,6 +182,155 @@ The vector store for the knowledge base. ## Methods +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +`KnowledgeBaseBase._enableCrossEnvironment` + +*** + +### addConfluenceDataSource() + +> **addConfluenceDataSource**(`props`): [`ConfluenceDataSource`](ConfluenceDataSource.md) + +Add a Confluence data source to the knowledge base. + +#### Parameters + +• **props**: [`ConfluenceDataSourceAssociationProps`](../interfaces/ConfluenceDataSourceAssociationProps.md) + +#### Returns + +[`ConfluenceDataSource`](ConfluenceDataSource.md) + +#### Inherited from + +`KnowledgeBaseBase.addConfluenceDataSource` + +*** + +### addS3DataSource() + +> **addS3DataSource**(`props`): [`S3DataSource`](S3DataSource.md) + +Add an S3 data source to the knowledge base. + +#### Parameters + +• **props**: [`S3DataSourceAssociationProps`](../interfaces/S3DataSourceAssociationProps.md) + +#### Returns + +[`S3DataSource`](S3DataSource.md) + +#### Inherited from + +`KnowledgeBaseBase.addS3DataSource` + +*** + +### addSalesforceDataSource() + +> **addSalesforceDataSource**(`props`): [`SalesforceDataSource`](SalesforceDataSource.md) + +Add a Salesforce data source to the knowledge base. + +#### Parameters + +• **props**: [`SalesforceDataSourceAssociationProps`](../interfaces/SalesforceDataSourceAssociationProps.md) + +#### Returns + +[`SalesforceDataSource`](SalesforceDataSource.md) + +#### Inherited from + +`KnowledgeBaseBase.addSalesforceDataSource` + +*** + +### addSharePointDataSource() + +> **addSharePointDataSource**(`props`): [`SharePointDataSource`](SharePointDataSource.md) + +Add a SharePoint data source to the knowledge base. + +#### Parameters + +• **props**: [`SharePointDataSourceAssociationProps`](../interfaces/SharePointDataSourceAssociationProps.md) + +#### Returns + +[`SharePointDataSource`](SharePointDataSource.md) + +#### Inherited from + +`KnowledgeBaseBase.addSharePointDataSource` + +*** + +### addWebCrawlerDataSource() + +> **addWebCrawlerDataSource**(`props`): [`WebCrawlerDataSource`](WebCrawlerDataSource.md) + +Add a web crawler data source to the knowledge base. + +#### Parameters + +• **props**: [`WebCrawlerDataSourceAssociationProps`](../interfaces/WebCrawlerDataSourceAssociationProps.md) + +#### Returns + +[`WebCrawlerDataSource`](WebCrawlerDataSource.md) + +#### Inherited from + +`KnowledgeBaseBase.addWebCrawlerDataSource` + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +`KnowledgeBaseBase.applyRemovalPolicy` + +*** + ### associateToAgent() > **associateToAgent**(`agent`): `void` @@ -137,6 +347,84 @@ Associate knowledge base with an agent *** +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +`KnowledgeBaseBase.generatePhysicalName` + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +`KnowledgeBaseBase.getResourceArnAttribute` + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +`KnowledgeBaseBase.getResourceNameAttribute` + +*** + ### toString() > **toString**(): `string` @@ -149,7 +437,25 @@ Returns a string representation of this construct. #### Inherited from -`Construct.toString` +`KnowledgeBaseBase.toString` + +*** + +### fromKnowledgeBaseAttributes() + +> `static` **fromKnowledgeBaseAttributes**(`scope`, `id`, `attrs`): [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **attrs**: [`KnowledgeBaseAttributes`](../interfaces/KnowledgeBaseAttributes.md) + +#### Returns + +[`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) *** @@ -187,4 +493,44 @@ true if `x` is an object created from a class which extends `Construct`. #### Inherited from -`Construct.isConstruct` +`KnowledgeBaseBase.isConstruct` + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +`KnowledgeBaseBase.isOwnedResource` + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +`KnowledgeBaseBase.isResource` diff --git a/apidocs/namespaces/bedrock/classes/ParsingStategy.md b/apidocs/namespaces/bedrock/classes/ParsingStategy.md new file mode 100644 index 00000000..119a4b06 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/ParsingStategy.md @@ -0,0 +1,64 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ParsingStategy + +# Class: `abstract` ParsingStategy + +Represents an advanced parsing strategy configuration for Knowledge Base ingestion. + +## See + +https://docs.aws.amazon.com/bedrock/latest/userguide/kb-chunking-parsing.html#kb-advanced-parsing + +## Constructors + +### new ParsingStategy() + +> **new ParsingStategy**(): [`ParsingStategy`](ParsingStategy.md) + +#### Returns + +[`ParsingStategy`](ParsingStategy.md) + +## Properties + +### configuration + +> `abstract` **configuration**: `ParsingConfigurationProperty` + +The CloudFormation property representation of this configuration + +## Methods + +### generatePolicyStatements() + +> `abstract` **generatePolicyStatements**(): `PolicyStatement`[] + +#### Returns + +`PolicyStatement`[] + +*** + +### foundationModel() + +> `static` **foundationModel**(`props`): [`ParsingStategy`](ParsingStategy.md) + +Creates a Foundation Model-based parsing strategy for extracting non-textual information +from documents such as tables and charts. +- Additional costs apply when using advanced parsing due to foundation model usage. +- There are limits on file types (PDF) and total data that can be parsed using advanced parsing. + +#### Parameters + +• **props**: [`FoundationModelParsingStategyProps`](../interfaces/FoundationModelParsingStategyProps.md) + +#### Returns + +[`ParsingStategy`](ParsingStategy.md) + +#### See + +https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-ds.html#kb-ds-supported-doc-formats-limits diff --git a/apidocs/namespaces/bedrock/classes/S3DataSource.md b/apidocs/namespaces/bedrock/classes/S3DataSource.md index c6eb74d8..e8cb9a73 100644 --- a/apidocs/namespaces/bedrock/classes/S3DataSource.md +++ b/apidocs/namespaces/bedrock/classes/S3DataSource.md @@ -6,11 +6,11 @@ # Class: S3DataSource -Sets up a data source to be added to a knowledge base. +Sets up an S3 Data Source to be added to a knowledge base. ## Extends -- `Construct` +- [`DataSourceNew`](DataSourceNew.md) ## Constructors @@ -32,15 +32,15 @@ Sets up a data source to be added to a knowledge base. #### Overrides -`Construct.constructor` +[`DataSourceNew`](DataSourceNew.md).[`constructor`](DataSourceNew.md#constructors) ## Properties -### dataSource +### bucket -> `readonly` **dataSource**: `CfnDataSource` +> `readonly` **bucket**: `IBucket` -The Data Source cfn resource. +The bucket associated with the data source. *** @@ -50,6 +50,82 @@ The Data Source cfn resource. The unique identifier of the data source. +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceId`](DataSourceNew.md#datasourceid) + +*** + +### dataSourceName + +> `readonly` **dataSourceName**: `string` + +The name of the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceName`](DataSourceNew.md#datasourcename) + +*** + +### dataSourceType + +> `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceType`](DataSourceNew.md#datasourcetype) + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`env`](DataSourceNew.md#env) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`kmsKey`](DataSourceNew.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`knowledgeBase`](DataSourceNew.md#knowledgebase) + *** ### node @@ -60,10 +136,210 @@ The tree node. #### Inherited from -`Construct.node` +[`DataSourceNew`](DataSourceNew.md).[`node`](DataSourceNew.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`physicalName`](DataSourceNew.md#physicalname) + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`stack`](DataSourceNew.md#stack) ## Methods +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`_enableCrossEnvironment`](DataSourceNew.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`applyRemovalPolicy`](DataSourceNew.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`formatAsCfnProps`](DataSourceNew.md#formatascfnprops) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`generatePhysicalName`](DataSourceNew.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceArnAttribute`](DataSourceNew.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceNameAttribute`](DataSourceNew.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`handleCommonPermissions`](DataSourceNew.md#handlecommonpermissions) + +*** + ### toString() > **toString**(): `string` @@ -76,7 +352,7 @@ Returns a string representation of this construct. #### Inherited from -`Construct.toString` +[`DataSourceNew`](DataSourceNew.md).[`toString`](DataSourceNew.md#tostring) *** @@ -114,4 +390,44 @@ true if `x` is an object created from a class which extends `Construct`. #### Inherited from -`Construct.isConstruct` +[`DataSourceNew`](DataSourceNew.md).[`isConstruct`](DataSourceNew.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isOwnedResource`](DataSourceNew.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isResource`](DataSourceNew.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/SalesforceDataSource.md b/apidocs/namespaces/bedrock/classes/SalesforceDataSource.md new file mode 100644 index 00000000..99ef94af --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/SalesforceDataSource.md @@ -0,0 +1,441 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceDataSource + +# Class: SalesforceDataSource + +Sets up an data source to be added to a knowledge base. + +## Extends + +- [`DataSourceNew`](DataSourceNew.md) + +## Constructors + +### new SalesforceDataSource() + +> **new SalesforceDataSource**(`scope`, `id`, `props`): [`SalesforceDataSource`](SalesforceDataSource.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props**: [`SalesforceDataSourceProps`](../interfaces/SalesforceDataSourceProps.md) + +#### Returns + +[`SalesforceDataSource`](SalesforceDataSource.md) + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`constructor`](DataSourceNew.md#constructors) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials. + +*** + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceId`](DataSourceNew.md#datasourceid) + +*** + +### dataSourceName + +> `readonly` **dataSourceName**: `string` + +The name of the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceName`](DataSourceNew.md#datasourcename) + +*** + +### dataSourceType + +> `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceType`](DataSourceNew.md#datasourcetype) + +*** + +### endpoint + +> `readonly` **endpoint**: `string` + +The Salesforce host URL or instance URL. + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`env`](DataSourceNew.md#env) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`kmsKey`](DataSourceNew.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`knowledgeBase`](DataSourceNew.md#knowledgebase) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`node`](DataSourceNew.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`physicalName`](DataSourceNew.md#physicalname) + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`stack`](DataSourceNew.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`_enableCrossEnvironment`](DataSourceNew.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`applyRemovalPolicy`](DataSourceNew.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`formatAsCfnProps`](DataSourceNew.md#formatascfnprops) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`generatePhysicalName`](DataSourceNew.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceArnAttribute`](DataSourceNew.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceNameAttribute`](DataSourceNew.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`handleCommonPermissions`](DataSourceNew.md#handlecommonpermissions) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`toString`](DataSourceNew.md#tostring) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isConstruct`](DataSourceNew.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isOwnedResource`](DataSourceNew.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isResource`](DataSourceNew.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/SharepointDataSource.md b/apidocs/namespaces/bedrock/classes/SharepointDataSource.md new file mode 100644 index 00000000..c7e5b4ff --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/SharepointDataSource.md @@ -0,0 +1,449 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointDataSource + +# Class: SharePointDataSource + +Sets up an data source to be added to a knowledge base. + +## Extends + +- [`DataSourceNew`](DataSourceNew.md) + +## Constructors + +### new SharePointDataSource() + +> **new SharePointDataSource**(`scope`, `id`, `props`): [`SharePointDataSource`](SharePointDataSource.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props**: [`SharePointDataSourceProps`](../interfaces/SharePointDataSourceProps.md) + +#### Returns + +[`SharePointDataSource`](SharePointDataSource.md) + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`constructor`](DataSourceNew.md#constructors) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials. + +*** + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceId`](DataSourceNew.md#datasourceid) + +*** + +### dataSourceName + +> `readonly` **dataSourceName**: `string` + +The name of the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceName`](DataSourceNew.md#datasourcename) + +*** + +### dataSourceType + +> `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceType`](DataSourceNew.md#datasourcetype) + +*** + +### domain + +> `readonly` **domain**: `string` + +The domain name of your SharePoint instance. + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`env`](DataSourceNew.md#env) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`kmsKey`](DataSourceNew.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`knowledgeBase`](DataSourceNew.md#knowledgebase) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`node`](DataSourceNew.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`physicalName`](DataSourceNew.md#physicalname) + +*** + +### siteUrls + +> `readonly` **siteUrls**: `string`[] + +The SharePoint site URL/URLs. + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`stack`](DataSourceNew.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`_enableCrossEnvironment`](DataSourceNew.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`applyRemovalPolicy`](DataSourceNew.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`formatAsCfnProps`](DataSourceNew.md#formatascfnprops) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`generatePhysicalName`](DataSourceNew.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceArnAttribute`](DataSourceNew.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceNameAttribute`](DataSourceNew.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`handleCommonPermissions`](DataSourceNew.md#handlecommonpermissions) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`toString`](DataSourceNew.md#tostring) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isConstruct`](DataSourceNew.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isOwnedResource`](DataSourceNew.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isResource`](DataSourceNew.md#isresource) diff --git a/apidocs/namespaces/bedrock/classes/WebCrawlerDataSource.md b/apidocs/namespaces/bedrock/classes/WebCrawlerDataSource.md new file mode 100644 index 00000000..66f06f38 --- /dev/null +++ b/apidocs/namespaces/bedrock/classes/WebCrawlerDataSource.md @@ -0,0 +1,441 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / WebCrawlerDataSource + +# Class: WebCrawlerDataSource + +Sets up a web crawler data source to be added to a knowledge base. + +## Extends + +- [`DataSourceNew`](DataSourceNew.md) + +## Constructors + +### new WebCrawlerDataSource() + +> **new WebCrawlerDataSource**(`scope`, `id`, `props`): [`WebCrawlerDataSource`](WebCrawlerDataSource.md) + +#### Parameters + +• **scope**: `Construct` + +• **id**: `string` + +• **props**: [`WebCrawlerDataSourceProps`](../interfaces/WebCrawlerDataSourceProps.md) + +#### Returns + +[`WebCrawlerDataSource`](WebCrawlerDataSource.md) + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`constructor`](DataSourceNew.md#constructors) + +## Properties + +### crawlingRate + +> `readonly` **crawlingRate**: `number` + +The max rate at which pages are crawled. + +*** + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceId`](DataSourceNew.md#datasourceid) + +*** + +### dataSourceName + +> `readonly` **dataSourceName**: `string` + +The name of the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceName`](DataSourceNew.md#datasourcename) + +*** + +### dataSourceType + +> `readonly` **dataSourceType**: [`DataSourceType`](../enumerations/DataSourceType.md) + +The type of data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`dataSourceType`](DataSourceNew.md#datasourcetype) + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`env`](DataSourceNew.md#env) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`kmsKey`](DataSourceNew.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](../interfaces/IKnowledgeBase.md) + +The knowledge base associated with the data source. + +#### Overrides + +[`DataSourceNew`](DataSourceNew.md).[`knowledgeBase`](DataSourceNew.md#knowledgebase) + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`node`](DataSourceNew.md#node) + +*** + +### physicalName + +> `protected` `readonly` **physicalName**: `string` + +Returns a string-encoded token that resolves to the physical name that +should be passed to the CloudFormation resource. + +This value will resolve to one of the following: +- a concrete value (e.g. `"my-awesome-bucket"`) +- `undefined`, when a name should be generated by CloudFormation +- a concrete name generated automatically during synthesis, in + cross-environment scenarios. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`physicalName`](DataSourceNew.md#physicalname) + +*** + +### siteUrls + +> `readonly` **siteUrls**: `string`[] + +The max rate at which pages are crawled. + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`stack`](DataSourceNew.md#stack) + +## Methods + +### \_enableCrossEnvironment() + +> **\_enableCrossEnvironment**(): `void` + +**`Internal`** + +Called when this resource is referenced across environments +(account/region) to order to request that a physical name will be generated +for this resource during synthesis, so the resource can be referenced +through its absolute name/arn. + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`_enableCrossEnvironment`](DataSourceNew.md#_enablecrossenvironment) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`applyRemovalPolicy`](DataSourceNew.md#applyremovalpolicy) + +*** + +### formatAsCfnProps() + +> **formatAsCfnProps**(`props`, `dataSourceConfiguration`): `CfnDataSourceProps` + +Formats the data source configuration properties for CloudFormation. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +• **dataSourceConfiguration**: `DataSourceConfigurationProperty` + +#### Returns + +`CfnDataSourceProps` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`formatAsCfnProps`](DataSourceNew.md#formatascfnprops) + +*** + +### generatePhysicalName() + +> `protected` **generatePhysicalName**(): `string` + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`generatePhysicalName`](DataSourceNew.md#generatephysicalname) + +*** + +### getResourceArnAttribute() + +> `protected` **getResourceArnAttribute**(`arnAttr`, `arnComponents`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "ARN" attribute (e.g. `bucket.bucketArn`). + +Normally, this token will resolve to `arnAttr`, but if the resource is +referenced across environments, `arnComponents` will be used to synthesize +a concrete ARN with the resource's physical name. Make sure to reference +`this.physicalName` in `arnComponents`. + +#### Parameters + +• **arnAttr**: `string` + +The CFN attribute which resolves to the ARN of the resource. +Commonly it will be called "Arn" (e.g. `resource.attrArn`), but sometimes +it's the CFN resource's `ref`. + +• **arnComponents**: `ArnComponents` + +The format of the ARN of this resource. You must +reference `this.physicalName` somewhere within the ARN in order for +cross-environment references to work. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceArnAttribute`](DataSourceNew.md#getresourcearnattribute) + +*** + +### getResourceNameAttribute() + +> `protected` **getResourceNameAttribute**(`nameAttr`): `string` + +Returns an environment-sensitive token that should be used for the +resource's "name" attribute (e.g. `bucket.bucketName`). + +Normally, this token will resolve to `nameAttr`, but if the resource is +referenced across environments, it will be resolved to `this.physicalName`, +which will be a concrete name. + +#### Parameters + +• **nameAttr**: `string` + +The CFN attribute which resolves to the resource's name. +Commonly this is the resource's `ref`. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`getResourceNameAttribute`](DataSourceNew.md#getresourcenameattribute) + +*** + +### handleCommonPermissions() + +> **handleCommonPermissions**(`props`): `void` + +Adds appropriate permissions to the KB execution role needed by the data source. + +#### Parameters + +• **props**: [`DataSourceAssociationProps`](../interfaces/DataSourceAssociationProps.md) + +#### Returns + +`void` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`handleCommonPermissions`](DataSourceNew.md#handlecommonpermissions) + +*** + +### toString() + +> **toString**(): `string` + +Returns a string representation of this construct. + +#### Returns + +`string` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`toString`](DataSourceNew.md#tostring) + +*** + +### isConstruct() + +> `static` **isConstruct**(`x`): `x is Construct` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +#### Parameters + +• **x**: `any` + +Any object + +#### Returns + +`x is Construct` + +true if `x` is an object created from a class which extends `Construct`. + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isConstruct`](DataSourceNew.md#isconstruct) + +*** + +### isOwnedResource() + +> `static` **isOwnedResource**(`construct`): `boolean` + +Returns true if the construct was created by CDK, and false otherwise + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`boolean` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isOwnedResource`](DataSourceNew.md#isownedresource) + +*** + +### isResource() + +> `static` **isResource**(`construct`): `construct is Resource` + +Check whether the given construct is a Resource + +#### Parameters + +• **construct**: `IConstruct` + +#### Returns + +`construct is Resource` + +#### Inherited from + +[`DataSourceNew`](DataSourceNew.md).[`isResource`](DataSourceNew.md#isresource) diff --git a/apidocs/namespaces/bedrock/enumerations/ChunkingStrategy.md b/apidocs/namespaces/bedrock/enumerations/ChunkingStrategy.md deleted file mode 100644 index 3be55bea..00000000 --- a/apidocs/namespaces/bedrock/enumerations/ChunkingStrategy.md +++ /dev/null @@ -1,42 +0,0 @@ -[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** - -*** - -[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ChunkingStrategy - -# Enumeration: ChunkingStrategy - -Knowledge base can split your source data into chunks. A chunk refers to an -excerpt from a data source that is returned when the knowledge base that it -belongs to is queried. You have the following options for chunking your -data. If you opt for NONE, then you may want to pre-process your files by -splitting them up such that each file corresponds to a chunk. - -## Enumeration Members - -### DEFAULT - -> **DEFAULT**: `"DEFAULT"` - -`FIXED_SIZE` with the default chunk size of 300 tokens and 20% overlap. -If default is selected, chunk size and overlap set by the user will be -ignored. - -*** - -### FIXED\_SIZE - -> **FIXED\_SIZE**: `"FIXED_SIZE"` - -Amazon Bedrock splits your source data into chunks of the approximate size -that you set in the `fixedSizeChunkingConfiguration`. - -*** - -### NONE - -> **NONE**: `"NONE"` - -Amazon Bedrock treats each file as one chunk. If you choose this option, -you may want to pre-process your documents by splitting them into separate -files. diff --git a/apidocs/namespaces/bedrock/enumerations/ConfluenceDataSourceAuthType.md b/apidocs/namespaces/bedrock/enumerations/ConfluenceDataSourceAuthType.md new file mode 100644 index 00000000..9750ad80 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/ConfluenceDataSourceAuthType.md @@ -0,0 +1,35 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceDataSourceAuthType + +# Enumeration: ConfluenceDataSourceAuthType + +The different authentication types available to connect to your Confluence instance. + +## See + +https://docs.aws.amazon.com/bedrock/latest/userguide/confluence-data-source-connector.html#configuration-confluence-connector + +## Enumeration Members + +### BASIC + +> **BASIC**: `"BASIC"` + +Your secret authentication credentials in AWS Secrets Manager should include: + - `username` (email of admin account) + - `password` (API token) + +*** + +### OAUTH2\_CLIENT\_CREDENTIALS + +> **OAUTH2\_CLIENT\_CREDENTIALS**: `"OAUTH2_CLIENT_CREDENTIALS"` + +Your secret authentication credentials in AWS Secrets Manager should include: +- `confluenceAppKey` +- `confluenceAppSecret` +- `confluenceAccessToken` +- `confluenceRefreshToken` diff --git a/apidocs/namespaces/bedrock/enumerations/ConfluenceObjectType.md b/apidocs/namespaces/bedrock/enumerations/ConfluenceObjectType.md new file mode 100644 index 00000000..0e332e8c --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/ConfluenceObjectType.md @@ -0,0 +1,40 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceObjectType + +# Enumeration: ConfluenceObjectType + +Represents the different types of content objects in Confluence that can be +crawled by the data source. + +## Enumeration Members + +### ATTACHMENT + +> **ATTACHMENT**: `"Attachment"` + +*** + +### BLOG + +> **BLOG**: `"Blog"` + +*** + +### COMMENT + +> **COMMENT**: `"Comment"` + +*** + +### PAGE + +> **PAGE**: `"Page"` + +*** + +### SPACE + +> **SPACE**: `"Space"` diff --git a/apidocs/namespaces/bedrock/enumerations/CrawlingScope.md b/apidocs/namespaces/bedrock/enumerations/CrawlingScope.md new file mode 100644 index 00000000..47ce3b5d --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/CrawlingScope.md @@ -0,0 +1,36 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / CrawlingScope + +# Enumeration: CrawlingScope + +The scope of the crawling. + +## Enumeration Members + +### DEFAULT + +> **DEFAULT**: `"DEFAULT"` + +Limit crawling to web pages that belong to the same host and with the +same initial URL path. + +*** + +### HOST\_ONLY + +> **HOST\_ONLY**: `"HOST_ONLY"` + +Crawls only web pages that belong to the same host or primary domain. + +*** + +### SUBDOMAINS + +> **SUBDOMAINS**: `"SUBDOMAINS"` + +Includes subdomains in addition to the host or primary domain, i.e. +web pages that contain "aws.amazon.com" can also include +sub domain "docs.aws.amazon.com" diff --git a/apidocs/namespaces/bedrock/enumerations/DataDeletionPolicy.md b/apidocs/namespaces/bedrock/enumerations/DataDeletionPolicy.md new file mode 100644 index 00000000..8576cf71 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/DataDeletionPolicy.md @@ -0,0 +1,28 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataDeletionPolicy + +# Enumeration: DataDeletionPolicy + +Specifies the policy for handling data when a data source resource is deleted. +This policy affects the vector embeddings created from the data source. + +## Enumeration Members + +### DELETE + +> **DELETE**: `"DELETE"` + +Deletes all vector embeddings derived from the data source upon deletion +of a data source resource. + +*** + +### RETAIN + +> **RETAIN**: `"RETAIN"` + +Retains all vector embeddings derived from the data source even after +deletion of a data source resource. diff --git a/apidocs/namespaces/bedrock/enumerations/DataSourceType.md b/apidocs/namespaces/bedrock/enumerations/DataSourceType.md new file mode 100644 index 00000000..78efebc6 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/DataSourceType.md @@ -0,0 +1,50 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataSourceType + +# Enumeration: DataSourceType + +Represents the types of data sources that can be associated to an Knowledge Base. + +## Enumeration Members + +### CONFLUENCE + +> **CONFLUENCE**: `"CONFLUENCE"` + +Confluence Cloud Instance data source. + +*** + +### S3 + +> **S3**: `"S3"` + +Amazon S3 Bucket data source. + +*** + +### SALESFORCE + +> **SALESFORCE**: `"SALESFORCE"` + +Salesforce instance data source. + +*** + +### SHAREPOINT + +> **SHAREPOINT**: `"SHAREPOINT"` + +Microsoft SharePoint instance data source. + +*** + +### WEB\_CRAWLER + +> **WEB\_CRAWLER**: `"WEB"` + +Web Crawler data source. +Extracts content from authorized public web pages using a crawler. diff --git a/apidocs/namespaces/bedrock/enumerations/SalesforceDataSourceAuthType.md b/apidocs/namespaces/bedrock/enumerations/SalesforceDataSourceAuthType.md new file mode 100644 index 00000000..83fd48e0 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/SalesforceDataSourceAuthType.md @@ -0,0 +1,20 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceDataSourceAuthType + +# Enumeration: SalesforceDataSourceAuthType + +Represents the authentication types available for connecting to a Salesforce data source. + +## Enumeration Members + +### OAUTH2\_CLIENT\_CREDENTIALS + +> **OAUTH2\_CLIENT\_CREDENTIALS**: `"OAUTH2_CLIENT_CREDENTIALS"` + +Your secret authentication credentials in AWS Secrets Manager should include: +- `consumerKey` (app client ID) +- `consumerSecret` (client secret) +- `authenticationUrl` diff --git a/apidocs/namespaces/bedrock/enumerations/SalesforceObjectType.md b/apidocs/namespaces/bedrock/enumerations/SalesforceObjectType.md new file mode 100644 index 00000000..a80aca72 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/SalesforceObjectType.md @@ -0,0 +1,135 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceObjectType + +# Enumeration: SalesforceObjectType + +Represents the Salesforce object types that can be accessed by the data source connector. + +## Enumeration Members + +### ACCOUNT + +> **ACCOUNT**: `"Account"` + +*** + +### ATTACHMENT + +> **ATTACHMENT**: `"Attachment"` + +*** + +### CAMPAIGN + +> **CAMPAIGN**: `"Campaign"` + +*** + +### CASE + +> **CASE**: `"Case"` + +*** + +### COLLABORATION\_GROUP + +> **COLLABORATION\_GROUP**: `"CollaborationGroup"` + +*** + +### CONTACT + +> **CONTACT**: `"Contact"` + +*** + +### CONTENT\_VERSION + +> **CONTENT\_VERSION**: `"ContentVersion"` + +*** + +### CONTRACT + +> **CONTRACT**: `"Contract"` + +*** + +### DOCUMENT + +> **DOCUMENT**: `"Document"` + +*** + +### FEED\_COMMENT + +> **FEED\_COMMENT**: `"FeedComment"` + +*** + +### FEED\_ITEM + +> **FEED\_ITEM**: `"FeedItem"` + +*** + +### IDEA + +> **IDEA**: `"Idea"` + +*** + +### KNOWLEDGE\_KAV + +> **KNOWLEDGE\_KAV**: `"Knowledge__kav"` + +*** + +### LEAD + +> **LEAD**: `"Lead"` + +*** + +### OPPORTUNITY + +> **OPPORTUNITY**: `"Opportunity"` + +*** + +### PARTNER + +> **PARTNER**: `"Partner"` + +*** + +### PRICEBOOK\_2 + +> **PRICEBOOK\_2**: `"Pricebook2"` + +*** + +### PRODUCT\_2 + +> **PRODUCT\_2**: `"Product2"` + +*** + +### SOLUTION + +> **SOLUTION**: `"Solution"` + +*** + +### TASK + +> **TASK**: `"Task"` + +*** + +### USER + +> **USER**: `"User"` diff --git a/apidocs/namespaces/bedrock/enumerations/SharepointDataSourceAuthType.md b/apidocs/namespaces/bedrock/enumerations/SharepointDataSourceAuthType.md new file mode 100644 index 00000000..89264dca --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/SharepointDataSourceAuthType.md @@ -0,0 +1,22 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointDataSourceAuthType + +# Enumeration: SharePointDataSourceAuthType + +Represents the authentication types available for connecting to a SharePoint data source. + +## Enumeration Members + +### OAUTH2\_CLIENT\_CREDENTIALS + +> **OAUTH2\_CLIENT\_CREDENTIALS**: `"OAUTH2_CLIENT_CREDENTIALS"` + +OAuth 2.0 Client Credentials flow for authentication with SharePoint. +Your secret authentication credentials in AWS Secrets Manager should include: +- `username`: The admin username for SharePoint authentication +- `password`: The admin password associated with the username +- `clientId`: The client ID (also known as application ID) +- `clientSecret`: The client secret diff --git a/apidocs/namespaces/bedrock/enumerations/SharepointObjectType.md b/apidocs/namespaces/bedrock/enumerations/SharepointObjectType.md new file mode 100644 index 00000000..a352df2d --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/SharepointObjectType.md @@ -0,0 +1,33 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointObjectType + +# Enumeration: SharePointObjectType + +Represents the SharePoint object types that can be accessed by the data source connector. + +## Enumeration Members + +### EVENT + +> **EVENT**: `"Event"` + +Represents a calendar event in SharePoint. + +*** + +### FILE + +> **FILE**: `"File"` + +Represents a file stored in SharePoint document libraries. + +*** + +### PAGE + +> **PAGE**: `"Page"` + +Represents a SharePoint page, which typically contains web parts and content. diff --git a/apidocs/namespaces/bedrock/enumerations/TransformationStep.md b/apidocs/namespaces/bedrock/enumerations/TransformationStep.md new file mode 100644 index 00000000..7fc77265 --- /dev/null +++ b/apidocs/namespaces/bedrock/enumerations/TransformationStep.md @@ -0,0 +1,18 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / TransformationStep + +# Enumeration: TransformationStep + +Defines the step in the ingestion process where the custom transformation is applied. + +## Enumeration Members + +### POST\_CHUNKING + +> **POST\_CHUNKING**: `"POST_CHUNKING"` + +Processes documents after they have been converted into chunks. +This allows for custom chunk-level metadata addition or custom post-chunking logic. diff --git a/apidocs/namespaces/bedrock/interfaces/ConfluenceCrawlingFilters.md b/apidocs/namespaces/bedrock/interfaces/ConfluenceCrawlingFilters.md new file mode 100644 index 00000000..9d491bd8 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/ConfluenceCrawlingFilters.md @@ -0,0 +1,56 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceCrawlingFilters + +# Interface: ConfluenceCrawlingFilters + +Defines filters for crawling Confluence content. +These filters allow you to include or exclude specific content based on object types and patterns. + +- For Spaces: Use the unique space key +- For Pages: Use the main page title +- For Blogs: Use the main blog title +- For Comments: Use "Re: Page/Blog Title" +- For Attachments: Use the filename with extension + +## Remarks + +- You can specify inclusion and exclusion patterns using regular expressions. +- If both inclusion and exclusion patterns match a document, the exclusion takes precedence. + +## Example + +```ts +{ + * objectType: ConfluenceObjectType.ATTACHMENT, + * excludePatterns: [".*private.*\\.pdf"] + * } +``` + +## Properties + +### excludePatterns? + +> `readonly` `optional` **excludePatterns**: `string`[] + +Regular expression patterns to exclude content. +Content matching these patterns will not be crawled, even if it matches an include pattern. + +*** + +### includePatterns? + +> `readonly` `optional` **includePatterns**: `string`[] + +Regular expression patterns to include content. +If specified, only content matching these patterns will be crawled. + +*** + +### objectType + +> `readonly` **objectType**: [`ConfluenceObjectType`](../enumerations/ConfluenceObjectType.md) + +The type of Confluence object to apply the filters to. diff --git a/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceAssociationProps.md new file mode 100644 index 00000000..9d8eab67 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceAssociationProps.md @@ -0,0 +1,197 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceDataSourceAssociationProps + +# Interface: ConfluenceDataSourceAssociationProps + +Interface to add a new data source to an existing KB. + +## Extends + +- [`DataSourceAssociationProps`](DataSourceAssociationProps.md) + +## Extended by + +- [`ConfluenceDataSourceProps`](ConfluenceDataSourceProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Confluence instance URL. Secret must start with "AmazonBedrock-". + +*** + +### authType? + +> `readonly` `optional` **authType**: [`ConfluenceDataSourceAuthType`](../enumerations/ConfluenceDataSourceAuthType.md) + +The supported authentication method to connect to the data source. + +#### Default + +```ts +ConfluenceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS +``` + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`chunkingStrategy`](DataSourceAssociationProps.md#chunkingstrategy) + +*** + +### confluenceUrl + +> `readonly` **confluenceUrl**: `string` + +The Confluence host URL or instance URL. + +#### Example + +```ts +https://example.atlassian.net +``` + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`customTransformation`](DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataDeletionPolicy`](DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataSourceName`](DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`description`](DataSourceAssociationProps.md#description) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`ConfluenceCrawlingFilters`](ConfluenceCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`kmsKey`](DataSourceAssociationProps.md#kmskey) + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`parsingStrategy`](DataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceProps.md b/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceProps.md new file mode 100644 index 00000000..3a1c1b21 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/ConfluenceDataSourceProps.md @@ -0,0 +1,217 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / ConfluenceDataSourceProps + +# Interface: ConfluenceDataSourceProps + +Interface to create a new standalone data source object. + +## Extends + +- [`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Confluence instance URL. Secret must start with "AmazonBedrock-". + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`authSecret`](ConfluenceDataSourceAssociationProps.md#authsecret) + +*** + +### authType? + +> `readonly` `optional` **authType**: [`ConfluenceDataSourceAuthType`](../enumerations/ConfluenceDataSourceAuthType.md) + +The supported authentication method to connect to the data source. + +#### Default + +```ts +ConfluenceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`authType`](ConfluenceDataSourceAssociationProps.md#authtype) + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`chunkingStrategy`](ConfluenceDataSourceAssociationProps.md#chunkingstrategy) + +*** + +### confluenceUrl + +> `readonly` **confluenceUrl**: `string` + +The Confluence host URL or instance URL. + +#### Example + +```ts +https://example.atlassian.net +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`confluenceUrl`](ConfluenceDataSourceAssociationProps.md#confluenceurl) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`customTransformation`](ConfluenceDataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`dataDeletionPolicy`](ConfluenceDataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`dataSourceName`](ConfluenceDataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`description`](ConfluenceDataSourceAssociationProps.md#description) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`ConfluenceCrawlingFilters`](ConfluenceCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`filters`](ConfluenceDataSourceAssociationProps.md#filters) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`kmsKey`](ConfluenceDataSourceAssociationProps.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](IKnowledgeBase.md) + +The knowledge base to associate with the data source. + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md).[`parsingStrategy`](ConfluenceDataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/CrawlingFilters.md b/apidocs/namespaces/bedrock/interfaces/CrawlingFilters.md new file mode 100644 index 00000000..5d60d760 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/CrawlingFilters.md @@ -0,0 +1,26 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / CrawlingFilters + +# Interface: CrawlingFilters + +The filters (regular expression patterns) to include or exclude in the crawling +in accordance with your scope. + +## Properties + +### excludePatterns? + +> `readonly` `optional` **excludePatterns**: `string`[] + +Exclude paths. + +*** + +### includePatterns? + +> `readonly` `optional` **includePatterns**: `string`[] + +Include patterns. diff --git a/apidocs/namespaces/bedrock/interfaces/DataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/DataSourceAssociationProps.md new file mode 100644 index 00000000..0a675199 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/DataSourceAssociationProps.md @@ -0,0 +1,117 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / DataSourceAssociationProps + +# Interface: DataSourceAssociationProps + +Properties common for creating any of the different data source types. + +## Extended by + +- [`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md) +- [`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md) +- [`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md) +- [`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md) +- [`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md) + +## Properties + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` diff --git a/apidocs/namespaces/bedrock/interfaces/FoundationModelParsingStategyProps.md b/apidocs/namespaces/bedrock/interfaces/FoundationModelParsingStategyProps.md new file mode 100644 index 00000000..1a57624c --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/FoundationModelParsingStategyProps.md @@ -0,0 +1,32 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / FoundationModelParsingStategyProps + +# Interface: FoundationModelParsingStategyProps + +Properties for configuring a Foundation Model parsing strategy. + +## Properties + +### parsingModel + +> `readonly` **parsingModel**: `IModel` + +The Foundation Model to use for parsing non-textual information. +Currently supported models are Claude 3 Sonnet and Claude 3 Haiku. + +*** + +### parsingPrompt? + +> `readonly` `optional` **parsingPrompt**: `string` + +Custom prompt to instruct the parser on how to interpret the document. + +#### Default + +```ts +- Uses the default instruction prompt as provided in the AWS Console. +``` diff --git a/apidocs/namespaces/bedrock/interfaces/HierarchicalChunkingProps.md b/apidocs/namespaces/bedrock/interfaces/HierarchicalChunkingProps.md new file mode 100644 index 00000000..dd8ea710 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/HierarchicalChunkingProps.md @@ -0,0 +1,33 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / HierarchicalChunkingProps + +# Interface: HierarchicalChunkingProps + +## Properties + +### maxChildTokenSize + +> `readonly` **maxChildTokenSize**: `number` + +Maximum number of tokens that a child chunk can contain. +Keep in mind the maximum chunk size depends on the embedding model chosen. + +*** + +### maxParentTokenSize + +> `readonly` **maxParentTokenSize**: `number` + +Maximum number of tokens that a parent chunk can contain. +Keep in mind the maximum chunk size depends on the embedding model chosen. + +*** + +### overlapTokens + +> `readonly` **overlapTokens**: `number` + +The overlap tokens between adjacent chunks. diff --git a/apidocs/namespaces/bedrock/interfaces/IDataSource.md b/apidocs/namespaces/bedrock/interfaces/IDataSource.md new file mode 100644 index 00000000..a03d0df0 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/IDataSource.md @@ -0,0 +1,97 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / IDataSource + +# Interface: IDataSource + +Specifies interface for resources created with CDK or imported into CDK. + +## Extends + +- `IResource` + +## Properties + +### dataSourceId + +> `readonly` **dataSourceId**: `string` + +The unique identifier of the data source. + +#### Example + +```ts +'JHUEVXUZMU' +``` + +*** + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +`IResource.env` + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +`IResource.node` + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +`IResource.stack` + +## Methods + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +`IResource.applyRemovalPolicy` diff --git a/apidocs/namespaces/bedrock/interfaces/IKnowledgeBase.md b/apidocs/namespaces/bedrock/interfaces/IKnowledgeBase.md new file mode 100644 index 00000000..a2276e48 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/IKnowledgeBase.md @@ -0,0 +1,199 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / IKnowledgeBase + +# Interface: IKnowledgeBase + +Represents a Knowledge Base, either created with CDK or imported. + +## Extends + +- `IResource` + +## Properties + +### env + +> `readonly` **env**: `ResourceEnvironment` + +The environment this resource belongs to. +For resources that are created and managed by the CDK +(generally, those created by creating new class instances like Role, Bucket, etc.), +this is always the same as the environment of the stack they belong to; +however, for imported resources +(those obtained from static methods like fromRoleArn, fromBucketName, etc.), +that might be different than the stack they were imported into. + +#### Inherited from + +`IResource.env` + +*** + +### knowledgeBaseArn + +> `readonly` **knowledgeBaseArn**: `string` + +The ARN of the knowledge base. + +#### Example + +```ts +"arn:aws:bedrock:us-east-1:123456789012:knowledge-base/KB12345678" +``` + +*** + +### knowledgeBaseId + +> `readonly` **knowledgeBaseId**: `string` + +The ID of the knowledge base. + +#### Example + +```ts +"KB12345678" +``` + +*** + +### node + +> `readonly` **node**: `Node` + +The tree node. + +#### Inherited from + +`IResource.node` + +*** + +### role + +> `readonly` **role**: `IRole` + +The role associated with the knowledge base. + +*** + +### stack + +> `readonly` **stack**: `Stack` + +The stack in which this resource is defined. + +#### Inherited from + +`IResource.stack` + +## Methods + +### addConfluenceDataSource() + +> **addConfluenceDataSource**(`props`): [`ConfluenceDataSource`](../classes/ConfluenceDataSource.md) + +Add a Confluence data source to the knowledge base. + +#### Parameters + +• **props**: [`ConfluenceDataSourceAssociationProps`](ConfluenceDataSourceAssociationProps.md) + +#### Returns + +[`ConfluenceDataSource`](../classes/ConfluenceDataSource.md) + +*** + +### addS3DataSource() + +> **addS3DataSource**(`props`): [`S3DataSource`](../classes/S3DataSource.md) + +Add an S3 data source to the knowledge base. + +#### Parameters + +• **props**: [`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md) + +#### Returns + +[`S3DataSource`](../classes/S3DataSource.md) + +*** + +### addSalesforceDataSource() + +> **addSalesforceDataSource**(`props`): [`SalesforceDataSource`](../classes/SalesforceDataSource.md) + +Add a Salesforce data source to the knowledge base. + +#### Parameters + +• **props**: [`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md) + +#### Returns + +[`SalesforceDataSource`](../classes/SalesforceDataSource.md) + +*** + +### addSharePointDataSource() + +> **addSharePointDataSource**(`props`): [`SharePointDataSource`](../classes/SharePointDataSource.md) + +Add a SharePoint data source to the knowledge base. + +#### Parameters + +• **props**: [`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md) + +#### Returns + +[`SharePointDataSource`](../classes/SharePointDataSource.md) + +*** + +### addWebCrawlerDataSource() + +> **addWebCrawlerDataSource**(`props`): [`WebCrawlerDataSource`](../classes/WebCrawlerDataSource.md) + +Add a web crawler data source to the knowledge base. + +#### Parameters + +• **props**: [`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md) + +#### Returns + +[`WebCrawlerDataSource`](../classes/WebCrawlerDataSource.md) + +*** + +### applyRemovalPolicy() + +> **applyRemovalPolicy**(`policy`): `void` + +Apply the given removal policy to this resource + +The Removal Policy controls what happens to this resource when it stops +being managed by CloudFormation, either because you've removed it from the +CDK application or because you've made a change that requires the resource +to be replaced. + +The resource can be deleted (`RemovalPolicy.DESTROY`), or left in your AWS +account for data recovery and cleanup later (`RemovalPolicy.RETAIN`). + +#### Parameters + +• **policy**: `RemovalPolicy` + +#### Returns + +`void` + +#### Inherited from + +`IResource.applyRemovalPolicy` diff --git a/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseAttributes.md b/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseAttributes.md new file mode 100644 index 00000000..e596226d --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseAttributes.md @@ -0,0 +1,37 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / KnowledgeBaseAttributes + +# Interface: KnowledgeBaseAttributes + +Properties for importing a knowledge base outside of this stack + +## Properties + +### executionRoleArn + +> `readonly` **executionRoleArn**: `string` + +The Service Execution Role associated with the knowledge base. + +#### Example + +```ts +"arn:aws:iam::123456789012:role/AmazonBedrockExecutionRoleForKnowledgeBaseawscdkbdgeBaseKB12345678" +``` + +*** + +### knowledgeBaseId + +> `readonly` **knowledgeBaseId**: `string` + +The ID of the knowledge base. + +#### Example + +```ts +"KB12345678" +``` diff --git a/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseProps.md b/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseProps.md index d43cd911..de39ade4 100644 --- a/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseProps.md +++ b/apidocs/namespaces/bedrock/interfaces/KnowledgeBaseProps.md @@ -34,7 +34,7 @@ The embeddings model for the knowledge base ### existingRole? -> `readonly` `optional` **existingRole**: `Role` +> `readonly` `optional` **existingRole**: `IRole` Existing IAM role with a policy statement granting permission to invoke the specific embeddings model. diff --git a/apidocs/namespaces/bedrock/interfaces/LambdaCustomTransformationProps.md b/apidocs/namespaces/bedrock/interfaces/LambdaCustomTransformationProps.md new file mode 100644 index 00000000..4338e940 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/LambdaCustomTransformationProps.md @@ -0,0 +1,32 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / LambdaCustomTransformationProps + +# Interface: LambdaCustomTransformationProps + +Properties for configuring a Lambda-based custom transformation. + +## Properties + +### lambdaFunction + +> `readonly` **lambdaFunction**: `IFunction` + +The Lambda function to use for custom document processing. + +*** + +### s3BucketUri + +> `readonly` **s3BucketUri**: `string` + +An S3 bucket URL/path to store input documents for Lambda processing +and to store the output of the processed documents. + +#### Example + +```ts +"s3://my-bucket/chunk-processor/" +``` diff --git a/apidocs/namespaces/bedrock/interfaces/S3DataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/S3DataSourceAssociationProps.md new file mode 100644 index 00000000..ae6d0b4d --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/S3DataSourceAssociationProps.md @@ -0,0 +1,167 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / S3DataSourceAssociationProps + +# Interface: S3DataSourceAssociationProps + +Interface to add a new S3DataSource to an existing KB + +## Extends + +- [`DataSourceAssociationProps`](DataSourceAssociationProps.md) + +## Extended by + +- [`S3DataSourceProps`](S3DataSourceProps.md) + +## Properties + +### bucket + +> `readonly` **bucket**: `IBucket` + +The bucket that contains the data source. + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`chunkingStrategy`](DataSourceAssociationProps.md#chunkingstrategy) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`customTransformation`](DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataDeletionPolicy`](DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataSourceName`](DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`description`](DataSourceAssociationProps.md#description) + +*** + +### inclusionPrefixes? + +> `readonly` `optional` **inclusionPrefixes**: `string`[] + +The prefixes of the objects in the bucket that should be included in the data source. + +#### Default + +```ts +- All objects in the bucket. +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`kmsKey`](DataSourceAssociationProps.md#kmskey) + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`parsingStrategy`](DataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/S3DataSourceProps.md b/apidocs/namespaces/bedrock/interfaces/S3DataSourceProps.md index e2839ae7..ebbf00a7 100644 --- a/apidocs/namespaces/bedrock/interfaces/S3DataSourceProps.md +++ b/apidocs/namespaces/bedrock/interfaces/S3DataSourceProps.md @@ -6,7 +6,11 @@ # Interface: S3DataSourceProps -Properties for an S3 Data Source. +Interface to create a new S3 Data Source object. + +## Extends + +- [`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md) ## Properties @@ -16,13 +20,19 @@ Properties for an S3 Data Source. The bucket that contains the data source. +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`bucket`](S3DataSourceAssociationProps.md#bucket) + *** ### chunkingStrategy? -> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../enumerations/ChunkingStrategy.md) +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) -The chunking strategy to use. +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. #### Default @@ -30,14 +40,82 @@ The chunking strategy to use. ChunkingStrategy.DEFAULT ``` +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`chunkingStrategy`](S3DataSourceAssociationProps.md#chunkingstrategy) + *** -### dataSourceName +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. -> `readonly` **dataSourceName**: `string` +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`customTransformation`](S3DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`dataDeletionPolicy`](S3DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` The name of the data source. +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`dataSourceName`](S3DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`description`](S3DataSourceAssociationProps.md#description) + *** ### inclusionPrefixes? @@ -52,6 +130,10 @@ The prefixes of the objects in the bucket that should be included in the data so - All objects in the bucket. ``` +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`inclusionPrefixes`](S3DataSourceAssociationProps.md#inclusionprefixes) + *** ### kmsKey? @@ -63,41 +145,35 @@ The KMS key to use to encrypt the data source. #### Default ```ts -Amazon Bedrock encrypts your data with a key that AWS owns and manages +- Service owned and managed key. ``` +#### Inherited from + +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`kmsKey`](S3DataSourceAssociationProps.md#kmskey) + *** ### knowledgeBase -> `readonly` **knowledgeBase**: [`KnowledgeBase`](../classes/KnowledgeBase.md) +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](IKnowledgeBase.md) -The knowledge base that this data source belongs to. +The knowledge base to associate with the data source. *** -### maxTokens? +### parsingStrategy? -> `readonly` `optional` **maxTokens**: `number` +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) -The maximum number of tokens to use in a chunk. +The parsing strategy to use. #### Default ```ts -300 +- No Parsing Stategy is used. ``` -*** +#### Inherited from -### overlapPercentage? - -> `readonly` `optional` **overlapPercentage**: `number` - -The percentage of overlap to use in a chunk. - -#### Default - -```ts -20 -``` +[`S3DataSourceAssociationProps`](S3DataSourceAssociationProps.md).[`parsingStrategy`](S3DataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/SalesforceCrawlingFilters.md b/apidocs/namespaces/bedrock/interfaces/SalesforceCrawlingFilters.md new file mode 100644 index 00000000..cf7c80d1 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SalesforceCrawlingFilters.md @@ -0,0 +1,33 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceCrawlingFilters + +# Interface: SalesforceCrawlingFilters + +Defines the crawling filters for Salesforce data ingestion. + +## Properties + +### excludePatterns? + +> `readonly` `optional` **excludePatterns**: `string`[] + +Regular expression patterns to exclude specific content. + +*** + +### includePatterns? + +> `readonly` `optional` **includePatterns**: `string`[] + +Regular expression patterns to include specific content. + +*** + +### objectType + +> `readonly` **objectType**: [`SalesforceObjectType`](../enumerations/SalesforceObjectType.md) + +The Salesforce object type to which this filter applies. diff --git a/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceAssociationProps.md new file mode 100644 index 00000000..0186bf79 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceAssociationProps.md @@ -0,0 +1,183 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceDataSourceAssociationProps + +# Interface: SalesforceDataSourceAssociationProps + +Interface to add a new data source to an existing KB. + +## Extends + +- [`DataSourceAssociationProps`](DataSourceAssociationProps.md) + +## Extended by + +- [`SalesforceDataSourceProps`](SalesforceDataSourceProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Salesforce instance URL. Secret must start with "AmazonBedrock-". + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`chunkingStrategy`](DataSourceAssociationProps.md#chunkingstrategy) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`customTransformation`](DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataDeletionPolicy`](DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataSourceName`](DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`description`](DataSourceAssociationProps.md#description) + +*** + +### endpoint + +> `readonly` **endpoint**: `string` + +The Salesforce host URL or instance URL. + +#### Example + +```ts +"https://company.salesforce.com/" +``` + +*** + +### filters? + +> `readonly` `optional` **filters**: [`SalesforceCrawlingFilters`](SalesforceCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`kmsKey`](DataSourceAssociationProps.md#kmskey) + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`parsingStrategy`](DataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceProps.md b/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceProps.md new file mode 100644 index 00000000..fb6a59ea --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SalesforceDataSourceProps.md @@ -0,0 +1,199 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SalesforceDataSourceProps + +# Interface: SalesforceDataSourceProps + +Interface to create a new standalone data source object. + +## Extends + +- [`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Salesforce instance URL. Secret must start with "AmazonBedrock-". + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`authSecret`](SalesforceDataSourceAssociationProps.md#authsecret) + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`chunkingStrategy`](SalesforceDataSourceAssociationProps.md#chunkingstrategy) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`customTransformation`](SalesforceDataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`dataDeletionPolicy`](SalesforceDataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`dataSourceName`](SalesforceDataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`description`](SalesforceDataSourceAssociationProps.md#description) + +*** + +### endpoint + +> `readonly` **endpoint**: `string` + +The Salesforce host URL or instance URL. + +#### Example + +```ts +"https://company.salesforce.com/" +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`endpoint`](SalesforceDataSourceAssociationProps.md#endpoint) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`SalesforceCrawlingFilters`](SalesforceCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`filters`](SalesforceDataSourceAssociationProps.md#filters) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`kmsKey`](SalesforceDataSourceAssociationProps.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](IKnowledgeBase.md) + +The knowledge base to associate with the data source. + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`SalesforceDataSourceAssociationProps`](SalesforceDataSourceAssociationProps.md).[`parsingStrategy`](SalesforceDataSourceAssociationProps.md#parsingstrategy) diff --git a/apidocs/namespaces/bedrock/interfaces/SharepointCrawlingFilters.md b/apidocs/namespaces/bedrock/interfaces/SharepointCrawlingFilters.md new file mode 100644 index 00000000..f2e472df --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SharepointCrawlingFilters.md @@ -0,0 +1,50 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointCrawlingFilters + +# Interface: SharePointCrawlingFilters + +Defines the crawling filters for SharePoint data ingestion. These filters allow +you to specify which content should be included or excluded during the crawling process. +If you specify an inclusion and exclusion filter and both match a document, +the exclusion filter takes precedence and the document isn’t crawled. + +## Properties + +### excludePatterns? + +> `readonly` `optional` **excludePatterns**: `string`[] + +Optional array of regular expression patterns to exclude specific content. +Content matching these patterns will be skipped during crawling. + +#### Example + +```ts +['.*private.*', '.*confidential.*'] +``` + +*** + +### includePatterns? + +> `readonly` `optional` **includePatterns**: `string`[] + +Optional array of regular expression patterns to include specific content. +Only content matching these patterns will be crawled. + +#### Example + +```ts +['.*public.*', '.*shared.*'] +``` + +*** + +### objectType + +> `readonly` **objectType**: [`SharePointObjectType`](../enumerations/SharePointObjectType.md) + +The SharePoint object type this filter applies to. diff --git a/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceAssociationProps.md new file mode 100644 index 00000000..7f50f831 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceAssociationProps.md @@ -0,0 +1,212 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointDataSourceAssociationProps + +# Interface: SharePointDataSourceAssociationProps + +Interface to add a new data source to an existing KB + +## Extends + +- [`DataSourceAssociationProps`](DataSourceAssociationProps.md) + +## Extended by + +- [`SharePointDataSourceProps`](SharePointDataSourceProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Sharepoint instance URL. Secret must start with "AmazonBedrock-". + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`chunkingStrategy`](DataSourceAssociationProps.md#chunkingstrategy) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`customTransformation`](DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataDeletionPolicy`](DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataSourceName`](DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`description`](DataSourceAssociationProps.md#description) + +*** + +### domain + +> `readonly` **domain**: `string` + +The domain of your SharePoint instance or site URL/URLs. + +#### Example + +```ts +"yourdomain" +``` + +*** + +### filters? + +> `readonly` `optional` **filters**: [`SharePointCrawlingFilters`](SharePointCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`kmsKey`](DataSourceAssociationProps.md#kmskey) + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`parsingStrategy`](DataSourceAssociationProps.md#parsingstrategy) + +*** + +### siteUrls + +> `readonly` **siteUrls**: `string`[] + +The SharePoint site URL/URLs. +Must start with “https”. All URLs must start with same protocol. + +#### Example + +```ts +["https://yourdomain.sharepoint.com/sites/mysite"] +``` + +*** + +### tenantId + +> `readonly` **tenantId**: `string` + +The identifier of your Microsoft 365 tenant. + +#### Example + +```ts +"d1c035a6-1dcf-457d-97e3" +``` diff --git a/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceProps.md b/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceProps.md new file mode 100644 index 00000000..59a37d69 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/SharepointDataSourceProps.md @@ -0,0 +1,236 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / SharePointDataSourceProps + +# Interface: SharePointDataSourceProps + +Interface to create a new standalone data source object + +## Extends + +- [`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md) + +## Properties + +### authSecret + +> `readonly` **authSecret**: `ISecret` + +The AWS Secrets Manager secret that stores your authentication credentials +for your Sharepoint instance URL. Secret must start with "AmazonBedrock-". + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`authSecret`](SharePointDataSourceAssociationProps.md#authsecret) + +*** + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`chunkingStrategy`](SharePointDataSourceAssociationProps.md#chunkingstrategy) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`customTransformation`](SharePointDataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`dataDeletionPolicy`](SharePointDataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`dataSourceName`](SharePointDataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`description`](SharePointDataSourceAssociationProps.md#description) + +*** + +### domain + +> `readonly` **domain**: `string` + +The domain of your SharePoint instance or site URL/URLs. + +#### Example + +```ts +"yourdomain" +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`domain`](SharePointDataSourceAssociationProps.md#domain) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`SharePointCrawlingFilters`](SharePointCrawlingFilters.md)[] + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None - all your content is crawled. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`filters`](SharePointDataSourceAssociationProps.md#filters) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`kmsKey`](SharePointDataSourceAssociationProps.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](IKnowledgeBase.md) + +The knowledge base to associate with the data source. + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`parsingStrategy`](SharePointDataSourceAssociationProps.md#parsingstrategy) + +*** + +### siteUrls + +> `readonly` **siteUrls**: `string`[] + +The SharePoint site URL/URLs. +Must start with “https”. All URLs must start with same protocol. + +#### Example + +```ts +["https://yourdomain.sharepoint.com/sites/mysite"] +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`siteUrls`](SharePointDataSourceAssociationProps.md#siteurls) + +*** + +### tenantId + +> `readonly` **tenantId**: `string` + +The identifier of your Microsoft 365 tenant. + +#### Example + +```ts +"d1c035a6-1dcf-457d-97e3" +``` + +#### Inherited from + +[`SharePointDataSourceAssociationProps`](SharePointDataSourceAssociationProps.md).[`tenantId`](SharePointDataSourceAssociationProps.md#tenantid) diff --git a/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceAssociationProps.md b/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceAssociationProps.md new file mode 100644 index 00000000..1d352954 --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceAssociationProps.md @@ -0,0 +1,198 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / WebCrawlerDataSourceAssociationProps + +# Interface: WebCrawlerDataSourceAssociationProps + +Interface to add a new data source to an existing KB. + +## Extends + +- [`DataSourceAssociationProps`](DataSourceAssociationProps.md) + +## Extended by + +- [`WebCrawlerDataSourceProps`](WebCrawlerDataSourceProps.md) + +## Properties + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`chunkingStrategy`](DataSourceAssociationProps.md#chunkingstrategy) + +*** + +### crawlingRate? + +> `readonly` `optional` **crawlingRate**: `number` + +The max rate at which pages are crawled, up to 300 per minute per host. +Higher values will decrease sync time but increase the load on the host. + +#### Default + +```ts +300 +``` + +*** + +### crawlingScope? + +> `readonly` `optional` **crawlingScope**: [`CrawlingScope`](../enumerations/CrawlingScope.md) + +The scope of the crawling. + +#### Default + +```ts +- CrawlingScope.DEFAULT +``` + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`customTransformation`](DataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataDeletionPolicy`](DataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`dataSourceName`](DataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`description`](DataSourceAssociationProps.md#description) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`CrawlingFilters`](CrawlingFilters.md) + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None +``` + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`kmsKey`](DataSourceAssociationProps.md#kmskey) + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`DataSourceAssociationProps`](DataSourceAssociationProps.md).[`parsingStrategy`](DataSourceAssociationProps.md#parsingstrategy) + +*** + +### sourceUrls + +> `readonly` **sourceUrls**: `string`[] + +The source urls in the format `https://www.sitename.com`. +Maximum of 100 URLs. diff --git a/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceProps.md b/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceProps.md new file mode 100644 index 00000000..0226a1bf --- /dev/null +++ b/apidocs/namespaces/bedrock/interfaces/WebCrawlerDataSourceProps.md @@ -0,0 +1,218 @@ +[**@cdklabs/generative-ai-cdk-constructs**](../../../README.md) • **Docs** + +*** + +[@cdklabs/generative-ai-cdk-constructs](../../../README.md) / [bedrock](../README.md) / WebCrawlerDataSourceProps + +# Interface: WebCrawlerDataSourceProps + +Interface to create a new standalone data source object. + +## Extends + +- [`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md) + +## Properties + +### chunkingStrategy? + +> `readonly` `optional` **chunkingStrategy**: [`ChunkingStrategy`](../classes/ChunkingStrategy.md) + +The chunking stategy to use for splitting your documents or content. +The chunks are then converted to embeddings and written to the vector +index allowing for similarity search and retrieval of the content. + +#### Default + +```ts +ChunkingStrategy.DEFAULT +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`chunkingStrategy`](WebCrawlerDataSourceAssociationProps.md#chunkingstrategy) + +*** + +### crawlingRate? + +> `readonly` `optional` **crawlingRate**: `number` + +The max rate at which pages are crawled, up to 300 per minute per host. +Higher values will decrease sync time but increase the load on the host. + +#### Default + +```ts +300 +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`crawlingRate`](WebCrawlerDataSourceAssociationProps.md#crawlingrate) + +*** + +### crawlingScope? + +> `readonly` `optional` **crawlingScope**: [`CrawlingScope`](../enumerations/CrawlingScope.md) + +The scope of the crawling. + +#### Default + +```ts +- CrawlingScope.DEFAULT +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`crawlingScope`](WebCrawlerDataSourceAssociationProps.md#crawlingscope) + +*** + +### customTransformation? + +> `readonly` `optional` **customTransformation**: [`CustomTransformation`](../classes/CustomTransformation.md) + +The custom transformation strategy to use. + +#### Default + +```ts +- No custom transformation is used. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`customTransformation`](WebCrawlerDataSourceAssociationProps.md#customtransformation) + +*** + +### dataDeletionPolicy? + +> `readonly` `optional` **dataDeletionPolicy**: [`DataDeletionPolicy`](../enumerations/DataDeletionPolicy.md) + +The data deletion policy to apply to the data source. + +#### Default + +```ts +- Sets the data deletion policy to the default of the data source type. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`dataDeletionPolicy`](WebCrawlerDataSourceAssociationProps.md#datadeletionpolicy) + +*** + +### dataSourceName? + +> `readonly` `optional` **dataSourceName**: `string` + +The name of the data source. + +#### Default + +```ts +- A new name will be generated. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`dataSourceName`](WebCrawlerDataSourceAssociationProps.md#datasourcename) + +*** + +### description? + +> `readonly` `optional` **description**: `string` + +A description of the data source. + +#### Default + +```ts +- No description is provided. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`description`](WebCrawlerDataSourceAssociationProps.md#description) + +*** + +### filters? + +> `readonly` `optional` **filters**: [`CrawlingFilters`](CrawlingFilters.md) + +The filters (regular expression patterns) for the crawling. +If there's a conflict, the exclude pattern takes precedence. + +#### Default + +```ts +None +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`filters`](WebCrawlerDataSourceAssociationProps.md#filters) + +*** + +### kmsKey? + +> `readonly` `optional` **kmsKey**: `IKey` + +The KMS key to use to encrypt the data source. + +#### Default + +```ts +- Service owned and managed key. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`kmsKey`](WebCrawlerDataSourceAssociationProps.md#kmskey) + +*** + +### knowledgeBase + +> `readonly` **knowledgeBase**: [`IKnowledgeBase`](IKnowledgeBase.md) + +The knowledge base to associate with the data source. + +*** + +### parsingStrategy? + +> `readonly` `optional` **parsingStrategy**: [`ParsingStategy`](../classes/ParsingStategy.md) + +The parsing strategy to use. + +#### Default + +```ts +- No Parsing Stategy is used. +``` + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`parsingStrategy`](WebCrawlerDataSourceAssociationProps.md#parsingstrategy) + +*** + +### sourceUrls + +> `readonly` **sourceUrls**: `string`[] + +The source urls in the format `https://www.sitename.com`. +Maximum of 100 URLs. + +#### Inherited from + +[`WebCrawlerDataSourceAssociationProps`](WebCrawlerDataSourceAssociationProps.md).[`sourceUrls`](WebCrawlerDataSourceAssociationProps.md#sourceurls) diff --git a/package.json b/package.json index e40801f8..27a9a904 100644 --- a/package.json +++ b/package.json @@ -20,11 +20,6 @@ "integ:aws-aoss-cw-dashboard:destroy": "npx projen integ:aws-aoss-cw-dashboard:destroy", "integ:aws-aoss-cw-dashboard:snapshot": "npx projen integ:aws-aoss-cw-dashboard:snapshot", "integ:aws-aoss-cw-dashboard:watch": "npx projen integ:aws-aoss-cw-dashboard:watch", - "integ:prompts:assert": "npx projen integ:prompts:assert", - "integ:prompts:deploy": "npx projen integ:prompts:deploy", - "integ:prompts:destroy": "npx projen integ:prompts:destroy", - "integ:prompts:snapshot": "npx projen integ:prompts:snapshot", - "integ:prompts:watch": "npx projen integ:prompts:watch", "integ:snapshot-all": "npx projen integ:snapshot-all", "package": "npx projen package", "package-all": "npx projen package-all", diff --git a/src/cdk-lib/bedrock/README.md b/src/cdk-lib/bedrock/README.md index f0253b30..9c70a4e1 100644 --- a/src/cdk-lib/bedrock/README.md +++ b/src/cdk-lib/bedrock/README.md @@ -66,9 +66,10 @@ new bedrock.S3DataSource(this, "DataSource", { bucket: docBucket, knowledgeBase: kb, dataSourceName: "books", - chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE, - maxTokens: 500, - overlapPercentage: 20, + chunkingStrategy: bedrock.ChunkingStrategy.fixedSize({ + maxTokens: 500, + overlapPercentage: 20, + }), }); ``` @@ -136,8 +137,6 @@ new bedrock.S3DataSource(this, "DataSource", { knowledgeBase: kb, dataSourceName: "books", chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE, - maxTokens: 500, - overlapPercentage: 20, }); ``` @@ -178,8 +177,6 @@ bedrock.S3DataSource(self, 'DataSource', knowledge_base=kb, data_source_name='books', chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE, - max_tokens=500, - overlap_percentage=20 ) ``` @@ -211,8 +208,6 @@ new bedrock.S3DataSource(this, "DataSource", { knowledgeBase: kb, dataSourceName: "books", chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE, - maxTokens: 500, - overlapPercentage: 20, }); ``` @@ -248,8 +243,6 @@ bedrock.S3DataSource(self, 'DataSource', knowledge_base=kb, data_source_name='books', chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE, - max_tokens=500, - overlap_percentage=20 ) ``` @@ -283,8 +276,6 @@ new bedrock.S3DataSource(this, "DataSource", { knowledgeBase: kb, dataSourceName: "books", chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE, - maxTokens: 500, - overlapPercentage: 20, }); ``` @@ -321,11 +312,222 @@ bedrock.S3DataSource(self, 'DataSource', knowledge_base=kb, data_source_name='books', chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE, - max_tokens=500, - overlap_percentage=20 ) ``` +#### Knowledge Base - Data Sources + +Data sources are the various repositories or systems from which information is extracted and ingested into the +knowledge base. These sources provide the raw content that will be processed, indexed, and made available for +querying within the knowledge base system. Data sources can include various types of systems such as document +management systems, databases, file storage systems, and content management platforms. Suuported Data Sources +include Amazon S3 buckets, Web Crawlers, SharePoint sites, Salesforce instances, and Confluence spaces. + +- **Amazon S3**. You can either create a new data source using the `bedrock.S3DataSource(..)` class, or using the + `kb.addS3DataSource(..)`. +- **Web Crawler**. You can either create a new data source using the `bedrock.WebCrawlerDataSource(..)` class, or using the + `kb.addWebCrawlerDataSource(..)`. +- **Confluence**. You can either create a new data source using the `bedrock.ConfluenceDataSource(..)` class, or using the + `kb.addConfluenceDataSource(..)`. +- **SharePoint**. You can either create a new data source using the `bedrock.SharePointDataSource(..)` class, or using the + `kb.addSharePointDataSource(..)`. +- **Salesforce**. You can either create a new data source using the `bedrock.SalesforceDataSource(..)` class, or using the + `kb.addSalesforceDataSource(..)`. + +Typescript + +```ts +const app = new cdk.App(); +const stack = new cdk.Stack(app, "aws-cdk-bedrock-data-sources-integ-test"); + +const kb = new KnowledgeBase(stack, "MyKnowledgeBase", { + name: "MyKnowledgeBase", + embeddingsModel: BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3, +}); + +const bucket = new Bucket(stack, "Bucket", {}); +const lambdaFunction = new Function(stack, "MyFunction", { + runtime: cdk.aws_lambda.Runtime.PYTHON_3_9, + handler: "index.handler", + code: cdk.aws_lambda.Code.fromInline('print("Hello, World!")'), +}); + +const secret = new Secret(stack, "Secret"); +const key = new Key(stack, "Key"); + +kb.addWebCrawlerDataSource({ + sourceUrls: ["https://docs.aws.amazon.com/"], + chunkingStrategy: ChunkingStrategy.HIERARCHICAL_COHERE, + customTransformation: CustomTransformation.lambda({ + lambdaFunction: lambdaFunction, + s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`, + }), +}); + +kb.addS3DataSource({ + bucket, + chunkingStrategy: ChunkingStrategy.SEMANTIC, + parsingStrategy: ParsingStategy.foundationModel({ + model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0.asIModel(stack), + }), +}); + +kb.addConfluenceDataSource({ + dataSourceName: "TestDataSource", + authSecret: secret, + kmsKey: key, + confluenceUrl: "https://example.atlassian.net", + filters: [ + { + objectType: ConfluenceObjectType.ATTACHMENT, + includePatterns: [".*\\.pdf"], + excludePatterns: [".*private.*\\.pdf"], + }, + { + objectType: ConfluenceObjectType.PAGE, + includePatterns: [".*public.*\\.pdf"], + excludePatterns: [".*confidential.*\\.pdf"], + }, + ], +}); + +kb.addSalesforceDataSource({ + authSecret: secret, + endpoint: "https://your-instance.my.salesforce.com", + kmsKey: key, + filters: [ + { + objectType: SalesforceObjectType.ATTACHMENT, + includePatterns: [".*\\.pdf"], + excludePatterns: [".*private.*\\.pdf"], + }, + { + objectType: SalesforceObjectType.CONTRACT, + includePatterns: [".*public.*\\.pdf"], + excludePatterns: [".*confidential.*\\.pdf"], + }, + ], +}); + +kb.addSharePointDataSource({ + dataSourceName: "SharepointDataSource", + authSecret: secret, + kmsKey: key, + domain: "yourdomain", + siteUrls: ["https://yourdomain.sharepoint.com/sites/mysite"], + tenantId: "888d0b57-69f1-4fb8-957f-e1f0bedf64de", + filters: [ + { + objectType: SharePointObjectType.PAGE, + includePatterns: [".*\\.pdf"], + excludePatterns: [".*private.*\\.pdf"], + }, + { + objectType: SharePointObjectType.FILE, + includePatterns: [".*public.*\\.pdf"], + excludePatterns: [".*confidential.*\\.pdf"], + }, + ], +}); +``` + +#### Knowledge Base - Chunking Strategies + +- **Default Chunking**: Applies Fixed Chunking with the default chunk size of 300 tokens and 20% overlap. + + ```ts + ChunkingStrategy.DEFAULT; + ``` + +- **Fixed Size Chunking**: This method divides the data into fixed-size chunks, with each chunk + containing a predetermined number of tokens. This strategy is useful when the data is uniform + in size and structure. + Typescript + + ```ts + // Fixed Size Chunking with sane defaults. + ChunkingStrategy.FIXED_SIZE; + + // Fixed Size Chunking with custom values. + ChunkingStrategy.fixedSize({ maxTokens: 200, overlapPercentage: 25 }); + ``` + +- **Hierarchical Chunking**: This strategy organizes data into layers of chunks, with the first + layer containing large chunks and the second layer containing smaller chunks derived from the first. + It is ideal for data with inherent hierarchies or nested structures. + + ```ts + // Hierarchical Chunking with the default for Cohere Models. + ChunkingStrategy.HIERARCHICAL_COHERE; + + // Hierarchical Chunking with the default for Titan Models. + ChunkingStrategy.HIERARCHICAL_TITAN; + + // Hierarchical Chunking with custom values. Tthe maximum chunk size depends on the model. + // Amazon Titan Text Embeddings: 8192. Cohere Embed models: 512 + ChunkingStrategy.hierarchical({ + overlapTokens: 60, + maxParentTokenSize: 1500, + maxChildTokenSize: 300, + }); + ``` + +- **Semantic Chunking**: This method splits data into smaller documents based on groups of similar + content derived from the text using natural language processing. It helps preserve contextual + relationships and ensures accurate and contextually appropriate results. + + ```ts + // Semantic Chunking with sane defaults. + ChunkingStrategy.SEMANTIC; + + // Semantic Chunking with custom values. + ChunkingStrategy.semantic({ bufferSize: 0, breakpointPercentileThreshold: 95, maxTokens: 300 }); + ``` + +- **No Chunking**: This strategy treats each file as one chunk. If you choose this option, + you may want to pre-process your documents by splitting them into separate files. + + ```ts + ChunkingStrategy.NONE; + ``` + +#### Knowledge Base - Parsing Strategy + +A parsing strategy in Amazon Bedrock is a configuration that determines how the service +processes and interprets the contents of a document. It involves converting the document's +contents into text and splitting it into smaller chunks for analysis. Amazon Bedrock offers +two parsing strategies: + +- **Default Parsing Strategy**: This strategy converts the document's contents into text + and splits it into chunks using a predefined approach. It is suitable for most use cases + but may not be optimal for specific document types or requirements. + +- **Foundation Model Parsing Strategy**: This strategy uses a foundation model to describe + the contents of the document. It is particularly useful for improved processing of PDF files + with tables and images. To use this strategy, set the `parsingStrategy` in a data source as below. + + ```ts + bedrock.ParsingStategy.foundationModel({ + model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0.asIModel(stack), + }); + ``` + +#### Knowledge Base - Custom Transformation + +Custom Transformation in Amazon Bedrock is a feature that allows you to create and apply +custom processing steps to documents moving through a data source ingestion pipeline. + +Custom Transformation uses AWS Lambda functions to process documents, enabling you to +perform custom operations such as data extraction, normalization, or enrichment. To +create a custom transformation, set the `customTransformation` in a data source as below. + +```ts +CustomTransformation.lambda({ + lambdaFunction: lambdaFunction, + s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`, +}), +``` + ## Agents Enable generative AI applications to execute multistep tasks across company systems and data sources. @@ -723,11 +925,7 @@ Example of `Prompt`: ```ts const cmk = new kms.Key(this, "cmk", {}); -const claudeModel = cdk_bedrock.FoundationModel.fromFoundationModelId( - this, - "model1", - cdk_bedrock.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_SONNET_20240229_V1_0 -); +const claudeModel = BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0.asIModel(this); const variant1 = PromptVariant.text({ variantName: "variant1", diff --git a/src/cdk-lib/bedrock/data-sources/base-data-source.ts b/src/cdk-lib/bedrock/data-sources/base-data-source.ts new file mode 100644 index 00000000..38861f6d --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/base-data-source.ts @@ -0,0 +1,248 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import { IResource, Resource } from 'aws-cdk-lib'; +import { CfnDataSource, CfnDataSourceProps } from 'aws-cdk-lib/aws-bedrock'; +import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import * as kms from 'aws-cdk-lib/aws-kms'; +import { Construct } from 'constructs'; + +import { IKnowledgeBase } from './../knowledge-base'; +import { ChunkingStrategy } from './chunking'; +import { CustomTransformation } from './custom-transformation'; +import { ParsingStategy } from './parsing'; +// import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; + + +/** + * Specifies the policy for handling data when a data source resource is deleted. + * This policy affects the vector embeddings created from the data source. + */ +export enum DataDeletionPolicy { + /** + * Deletes all vector embeddings derived from the data source upon deletion + * of a data source resource. + */ + DELETE = 'DELETE', + + /** + * Retains all vector embeddings derived from the data source even after + * deletion of a data source resource. + */ + RETAIN = 'RETAIN' +} + + +/** + * Represents the types of data sources that can be associated to an Knowledge Base. + */ +export enum DataSourceType { + /** + * Amazon S3 Bucket data source. + */ + S3 = 'S3', + + /** + * Confluence Cloud Instance data source. + */ + CONFLUENCE = 'CONFLUENCE', + + /** + * Salesforce instance data source. + */ + SALESFORCE = 'SALESFORCE', + + /** + * Microsoft SharePoint instance data source. + */ + SHAREPOINT = 'SHAREPOINT', + + /** + * Web Crawler data source. + * Extracts content from authorized public web pages using a crawler. + */ + WEB_CRAWLER = 'WEB' +} + + +/** + * Specifies interface for resources created with CDK or imported into CDK. + */ +export interface IDataSource extends IResource { + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + readonly dataSourceId: string; +} + +/** + * Specifies the base class for all data source resources (imported and new). + */ +export abstract class DataSourceBase extends Resource implements IDataSource { + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public abstract readonly dataSourceId: string; + + // Common methods for imported and new data sources go here +} + + +/** + * Properties common for creating any of the different data source types. + */ +export interface DataSourceAssociationProps { + /** + * The name of the data source. + * + * @default - A new name will be generated. + */ + readonly dataSourceName?: string; + + /** + * A description of the data source. + * + * @default - No description is provided. + */ + readonly description?: string; + + /** + * The KMS key to use to encrypt the data source. + * + * @default - Service owned and managed key. + */ + readonly kmsKey?: kms.IKey; + + /** + * The data deletion policy to apply to the data source. + * + * @default - Sets the data deletion policy to the default of the data source type. + */ + readonly dataDeletionPolicy?: DataDeletionPolicy; + + /** + * The chunking stategy to use for splitting your documents or content. + * The chunks are then converted to embeddings and written to the vector + * index allowing for similarity search and retrieval of the content. + * + * @default ChunkingStrategy.DEFAULT + */ + readonly chunkingStrategy?: ChunkingStrategy; + + /** + * The parsing strategy to use. + * + * @default - No Parsing Stategy is used. + */ + readonly parsingStrategy?: ParsingStategy; + + /** + * The custom transformation strategy to use. + * + * @default - No custom transformation is used. + */ + readonly customTransformation?: CustomTransformation; +} + +/** + * Specifies the base class for all NEW data source resources of ANY type. + */ +export abstract class DataSourceNew extends DataSourceBase { + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public abstract readonly dataSourceId: string; + /** + * The type of data source. + */ + public abstract readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public abstract readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public abstract readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public abstract readonly kmsKey?: kms.IKey; + + // ------------------------------------------------------ + // Common methods for ALL NEW data sources + // ------------------------------------------------------ + + /** + * Adds appropriate permissions to the KB execution role needed by the data source. + */ + public handleCommonPermissions(props: DataSourceAssociationProps) { + let statementsToAdd: PolicyStatement[] = []; + // Parsing strategy requires access to the parsing FM, so be sure to add permissions + if (props.parsingStrategy) { + statementsToAdd.push(...props.parsingStrategy.generatePolicyStatements()); + } + // Custom transformation requires invoke permissions for the Lambda + if (props.customTransformation) { + statementsToAdd.push(...props.customTransformation.generatePolicyStatements(this)); + } + // Add the permission statements to the KB execution role + statementsToAdd.forEach((statement) => { + this.knowledgeBase.role.addToPrincipalPolicy(statement); + }); + } + + /** + * Formats the data source configuration properties for CloudFormation. + */ + public formatAsCfnProps( + props: DataSourceAssociationProps, + dataSourceConfiguration: CfnDataSource.DataSourceConfigurationProperty, + ): CfnDataSourceProps { + return { + dataDeletionPolicy: props.dataDeletionPolicy, + dataSourceConfiguration: dataSourceConfiguration, + description: props.description, + knowledgeBaseId: this.knowledgeBase.knowledgeBaseId, + name: this.dataSourceName, + serverSideEncryptionConfiguration: props.kmsKey ? { + kmsKeyArn: props.kmsKey.keyArn, + } : undefined, + vectorIngestionConfiguration: (props.chunkingStrategy || props.parsingStrategy || props.customTransformation) ? { + chunkingConfiguration: props.chunkingStrategy?.configuration, + parsingConfiguration: props.parsingStrategy?.configuration, + customTransformationConfiguration: props.customTransformation?.configuration, + } : undefined, + + }; + } + +} + + +export class DataSource extends DataSourceBase { + + public static fromDataSourceId(scope: Construct, id: string, dataSourceId: string): IDataSource { + return new DataSource(scope, id, dataSourceId); + } + + public readonly dataSourceId: string; + + private constructor(scope: Construct, id: string, dataSourceId: string) { + super(scope, id); + this.dataSourceId = dataSourceId; + } +} diff --git a/src/cdk-lib/bedrock/data-sources/chunking.ts b/src/cdk-lib/bedrock/data-sources/chunking.ts new file mode 100644 index 00000000..4f63f9d0 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/chunking.ts @@ -0,0 +1,183 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; + +/** + * Knowledge base can split your source data into chunks. A chunk refers to an + * excerpt from a data source that is returned when the knowledge base that it + * belongs to is queried. You have the following options for chunking your + * data. If you opt for NONE, then you may want to pre-process your files by + * splitting them up such that each file corresponds to a chunk. + */ +enum ChunkingStrategyType { + /** + * Amazon Bedrock splits your source data into chunks of the approximate size + * that you set in the `fixedSizeChunkingConfiguration`. + */ + FIXED_SIZE = 'FIXED_SIZE', + /** + * Splits documents into layers of chunks where the first layer contains large + * chunks, and the second layer contains smaller chunks derived from the first + * layer. You set the maximum parent chunk token size and the maximum child + * chunk token size. You also set the absolute number of overlap tokens between + * consecutive parent chunks and consecutive child chunks. + */ + HIERARCHICAL = 'HIERARCHICAL', + /** + * Splits documents into semantically similar text chunks or groups of + * sentences by using a foundation model. Note that there are additional + * costs to using semantic chunking due to its use of a foundation model. + */ + SEMANTIC = 'SEMANTIC', + /** + * Amazon Bedrock treats each file as one chunk. If you choose this option, + * you may want to pre-process your documents by splitting them into separate + * files. + */ + NONE = 'NONE', +} + +export interface HierarchicalChunkingProps { + /** + * The overlap tokens between adjacent chunks. + */ + readonly overlapTokens: number; + /** + * Maximum number of tokens that a parent chunk can contain. + * Keep in mind the maximum chunk size depends on the embedding model chosen. + */ + readonly maxParentTokenSize: number; + /** + * Maximum number of tokens that a child chunk can contain. + * Keep in mind the maximum chunk size depends on the embedding model chosen. + */ + readonly maxChildTokenSize: number; +} + +export abstract class ChunkingStrategy { + // ------------------------------------------------------ + // Static Constants for Easy Customization + // ------------------------------------------------------ + /** + * Fixed Sized Chunking with the default chunk size of 300 tokens and 20% overlap. + */ + public static readonly DEFAULT = ChunkingStrategy.fixedSize( + { maxTokens: 300, overlapPercentage: 20 }, + ); + /** + * Fixed Sized Chunking with the default chunk size of 300 tokens and 20% overlap. + * You can adjust these values based on your specific requirements using the + * `ChunkingStrategy.fixedSize(params)` method. + */ + public static readonly FIXED_SIZE = ChunkingStrategy.fixedSize( + { maxTokens: 300, overlapPercentage: 20 }, + ); + /** + * Hierarchical Chunking with the default for Cohere Models. + * - Overlap tokens: 30 + * - Max parent token size: 500 + * - Max child token size: 100 + */ + public static readonly HIERARCHICAL_COHERE = ChunkingStrategy.hierarchical( + { overlapTokens: 60, maxParentTokenSize: 500, maxChildTokenSize: 300 }, + ); + + /** + * Hierarchical Chunking with the default for Titan Models. + * - Overlap tokens: 60 + * - Max parent token size: 1500 + * - Max child token size: 300 + */ + public static readonly HIERARCHICAL_TITAN = ChunkingStrategy.hierarchical( + { overlapTokens: 60, maxParentTokenSize: 1500, maxChildTokenSize: 300 }, + ); + /** + * Semantic Chunking with the default of bufferSize: 0, + * breakpointPercentileThreshold: 95, and maxTokens: 300. + * You can adjust these values based on your specific requirements using the + * `ChunkingStrategy.semantic(params)` method. + */ + public static readonly SEMANTIC = ChunkingStrategy.semantic( + { bufferSize: 0, breakpointPercentileThreshold: 95, maxTokens: 300 }, + ); + /** + * Amazon Bedrock treats each file as one chunk. Suitable for documents that + * are already pre-processed or text split. + */ + public static readonly NONE = ChunkingStrategy.noChunking(); + + // ------------------------------------------------------ + // Static Methods for Customization + // ------------------------------------------------------ + /** Method for customizing a fixed sized chunking strategy. */ + public static fixedSize(props: CfnDataSource.FixedSizeChunkingConfigurationProperty): ChunkingStrategy { + return { + configuration: { + chunkingStrategy: ChunkingStrategyType.FIXED_SIZE, + fixedSizeChunkingConfiguration: props, + }, + }; + } + + /** + * Method for customizing a hierarchical chunking strategy. + * For custom chunking, the maximum token chunk size depends on the model. + * - Amazon Titan Text Embeddings: 8192 + * - Cohere Embed models: 512 + */ + public static hierarchical(props: HierarchicalChunkingProps): ChunkingStrategy { + return { + configuration: { + chunkingStrategy: ChunkingStrategyType.HIERARCHICAL, + hierarchicalChunkingConfiguration: { + overlapTokens: props.overlapTokens, + levelConfigurations: [ + { maxTokens: props.maxParentTokenSize }, + { maxTokens: props.maxChildTokenSize }, + ], + }, + }, + }; + } + + /** + * Method for customizing a semantic chunking strategy. + * For custom chunking, the maximum token chunk size depends on the model. + * - Amazon Titan Text Embeddings: 8192 + * - Cohere Embed models: 512 + */ + public static semantic(props: CfnDataSource.SemanticChunkingConfigurationProperty): ChunkingStrategy { + return { + configuration: { + chunkingStrategy: ChunkingStrategyType.SEMANTIC, + semanticChunkingConfiguration: props, + }, + }; + } + + /** Method for defining a no chunking strategy. */ + private static noChunking(): ChunkingStrategy { + return { + configuration: { + chunkingStrategy: ChunkingStrategyType.NONE, + }, + }; + } + // ------------------------------------------------------ + // Properties + // ------------------------------------------------------ + /** The CloudFormation property representation of this configuration */ + public abstract configuration: CfnDataSource.ChunkingConfigurationProperty; + + private constructor() { } +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/confluence-data-source.ts b/src/cdk-lib/bedrock/data-sources/confluence-data-source.ts new file mode 100644 index 00000000..a01a7d83 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/confluence-data-source.ts @@ -0,0 +1,240 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { IKey } from 'aws-cdk-lib/aws-kms'; +import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; +import { Construct } from 'constructs'; + +import { IKnowledgeBase } from './../knowledge-base'; +import { DataSourceAssociationProps, DataSourceNew, DataSourceType } from './base-data-source'; +import { generatePhysicalNameV2 } from '../../../common/helpers/utils'; + +/** + * The different authentication types available to connect to your Confluence instance. + * @see https://docs.aws.amazon.com/bedrock/latest/userguide/confluence-data-source-connector.html#configuration-confluence-connector + */ +export enum ConfluenceDataSourceAuthType { + /** + * Your secret authentication credentials in AWS Secrets Manager should include: + * - `confluenceAppKey` + * - `confluenceAppSecret` + * - `confluenceAccessToken` + * - `confluenceRefreshToken` + */ + OAUTH2_CLIENT_CREDENTIALS = 'OAUTH2_CLIENT_CREDENTIALS', + /** + * Your secret authentication credentials in AWS Secrets Manager should include: + * - `username` (email of admin account) + * - `password` (API token) + */ + BASIC = 'BASIC', +} + +/** + * Represents the different types of content objects in Confluence that can be + * crawled by the data source. + */ +export enum ConfluenceObjectType { + SPACE = 'Space', + PAGE = 'Page', + BLOG = 'Blog', + COMMENT = 'Comment', + ATTACHMENT = 'Attachment', +} + +/** + * Defines filters for crawling Confluence content. + * These filters allow you to include or exclude specific content based on object types and patterns. + * + * - For Spaces: Use the unique space key + * - For Pages: Use the main page title + * - For Blogs: Use the main blog title + * - For Comments: Use "Re: Page/Blog Title" + * - For Attachments: Use the filename with extension + * @remarks + * - You can specify inclusion and exclusion patterns using regular expressions. + * - If both inclusion and exclusion patterns match a document, the exclusion takes precedence. + * + * @example + * { + * objectType: ConfluenceObjectType.ATTACHMENT, + * excludePatterns: [".*private.*\\.pdf"] + * } + */ +export interface ConfluenceCrawlingFilters { + /** + * The type of Confluence object to apply the filters to. + */ + readonly objectType: ConfluenceObjectType; + + /** + * Regular expression patterns to include content. + * If specified, only content matching these patterns will be crawled. + */ + readonly includePatterns?: string[]; + + /** + * Regular expression patterns to exclude content. + * Content matching these patterns will not be crawled, even if it matches an include pattern. + */ + readonly excludePatterns?: string[]; +} + + +/** + * Interface to add a new data source to an existing KB. + */ +export interface ConfluenceDataSourceAssociationProps extends DataSourceAssociationProps { + /** + * The Confluence host URL or instance URL. + * @example https://example.atlassian.net + */ + readonly confluenceUrl: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials + * for your Confluence instance URL. Secret must start with "AmazonBedrock-". + */ + readonly authSecret: ISecret; + /** + * The supported authentication method to connect to the data source. + * @default ConfluenceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS + */ + readonly authType?: ConfluenceDataSourceAuthType; + /** + * The filters (regular expression patterns) for the crawling. + * If there's a conflict, the exclude pattern takes precedence. + * @default None - all your content is crawled. + */ + readonly filters?: ConfluenceCrawlingFilters[]; + +} + +/** + * Interface to create a new standalone data source object. + */ +export interface ConfluenceDataSourceProps extends ConfluenceDataSourceAssociationProps { + /** + * The knowledge base to associate with the data source. + */ + readonly knowledgeBase: IKnowledgeBase; +} + + +/** + * Sets up a Confluence Data Source to be added to a knowledge base. + * @see https://docs.aws.amazon.com/bedrock/latest/userguide/confluence-data-source-connector.html + */ +export class ConfluenceDataSource extends DataSourceNew { + // ------------------------------------------------------ + // Common attributes for all new data sources + // ------------------------------------------------------ + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public readonly dataSourceId: string; + /** + * The type of data source. + */ + public readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public readonly kmsKey?: IKey; + // ------------------------------------------------------ + // Unique to this class + // ------------------------------------------------------ + /** + * The Confluence host URL or instance URL. + */ + public readonly confluenceUrl: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials. + */ + public readonly authSecret: ISecret; + // ------------------------------------------------------ + // Internal Only + // ------------------------------------------------------ + /** + * The Data Source cfn resource. + */ + private readonly __resource: CfnDataSource; + + + constructor(scope: Construct, id: string, props: ConfluenceDataSourceProps) { + super(scope, id); + // Assign common attributes + this.knowledgeBase = props.knowledgeBase; + this.dataSourceType = DataSourceType.CONFLUENCE; + this.dataSourceName = props.dataSourceName ?? generatePhysicalNameV2(this, 'confluence-ds', { maxLength: 40, lower: true, separator: '-' }); + this.kmsKey = props.kmsKey; + // Assign unique attributes + this.confluenceUrl = props.confluenceUrl; + this.authSecret = props.authSecret; + + // ------------------------------------------------------ + // Manage permissions for the data source + // ------------------------------------------------------ + this.handleCommonPermissions(props); + this.authSecret.grantRead(this.knowledgeBase.role); + + // Grant write permissions to the knowledge base role for updating the secret. + // This is necessary when using OAuth 2.0 authentication with a refresh token. + if (props.authType === ConfluenceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS) { + this.authSecret.grantWrite(this.knowledgeBase.role); + } + // ------------------------------------------------------ + // L1 Instantiation + // ------------------------------------------------------ + this.__resource = new CfnDataSource(this, 'DataSource', { + ...this.formatAsCfnProps( + props, + { + type: this.dataSourceType, + confluenceConfiguration: { + sourceConfiguration: { + authType: props.authType ?? ConfluenceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS, + credentialsSecretArn: this.authSecret.secretArn, + hostUrl: this.confluenceUrl, + hostType: 'SAAS', + }, + crawlerConfiguration: + (props.filters) ? ({ + filterConfiguration: { + type: 'PATTERN', + patternObjectFilter: { + filters: props.filters?.map(item => ({ + objectType: item.objectType, + inclusionFilters: item.includePatterns, + exclusionFilters: item.excludePatterns, + })), + }, + }, + }) : undefined, + }, + }, + ), + }); + + this.dataSourceId = this.__resource.attrDataSourceId; + + } +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/custom-transformation.ts b/src/cdk-lib/bedrock/data-sources/custom-transformation.ts new file mode 100644 index 00000000..96c15440 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/custom-transformation.ts @@ -0,0 +1,117 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import { Stack } from 'aws-cdk-lib'; +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { IFunction } from 'aws-cdk-lib/aws-lambda'; +import { Construct } from 'constructs'; + +/** + * Defines the step in the ingestion process where the custom transformation is applied. + */ +export enum TransformationStep { + /** + * Processes documents after they have been converted into chunks. + * This allows for custom chunk-level metadata addition or custom post-chunking logic. + */ + POST_CHUNKING = 'POST_CHUNKING', +} + +/** + * Properties for configuring a Lambda-based custom transformation. + */ +export interface LambdaCustomTransformationProps { + /** + * The Lambda function to use for custom document processing. + */ + readonly lambdaFunction: IFunction; + + /** + * An S3 bucket URL/path to store input documents for Lambda processing + * and to store the output of the processed documents. + * @example "s3://my-bucket/chunk-processor/" + */ + readonly s3BucketUri: string; + + // Commented as only one supported at the time this code is written. + // /** + // * When in the ingestion process to apply the transformation step. + // * @default TransformationStep.POST_CHUNKING + // */ + // readonly stepToApply?: TransformationStep; +} + +/** + * Represents a custom transformation configuration for a data source ingestion. + * @see https://docs.aws.amazon.com/bedrock/latest/userguide/kb-chunking-parsing.html#kb-custom-transformation + */ +export abstract class CustomTransformation { + + // ------------------------------------------------------ + // Lambda Transformation Strategy + // ------------------------------------------------------ + /** + * This feature allows you to use a Lambda function to inject your own logic + * into the knowledge base ingestion process. + * @see https://github.com/aws-samples/amazon-bedrock-samples/blob/main/knowledge-bases/features-examples/02-optimizing-accuracy-retrieved-results/advanced_chunking_options.ipynb + */ + public static lambda(props: LambdaCustomTransformationProps): CustomTransformation { + + class LambdaCustomTransformation extends CustomTransformation { + public readonly configuration = { + intermediateStorage: { + s3Location: { + uri: props.s3BucketUri, + }, + }, + transformations: [ + { + stepToApply: TransformationStep.POST_CHUNKING, + // To uncomment when more steps are available + // stepToApply: props.stepToApply ?? TransformationStep.POST_CHUNKING, + transformationFunction: { + transformationLambdaConfiguration: { + lambdaArn: props.lambdaFunction.functionArn, + }, + }, + }, + ], + }; + public generatePolicyStatements(scope: Construct): PolicyStatement[] { + return [ + new PolicyStatement({ + actions: ['lambda:InvokeFunction'], + resources: [`${props.lambdaFunction.functionArn}:*`], + conditions: { + StringEquals: { + 'aws:ResourceAccount': Stack.of(scope).account, + }, + }, + }), + ]; + } + } + return new LambdaCustomTransformation(); + } + // ------------------------------------------------------ + // Properties + // ------------------------------------------------------ + /** + * The CloudFormation property representation of this custom transformation configuration. + */ + public abstract configuration: CfnDataSource.CustomTransformationConfigurationProperty; + + public abstract generatePolicyStatements(scope: Construct): PolicyStatement[]; + +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/default-parsing-prompt.ts b/src/cdk-lib/bedrock/data-sources/default-parsing-prompt.ts new file mode 100644 index 00000000..601a6757 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/default-parsing-prompt.ts @@ -0,0 +1,66 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +export const DEFAULT_PARSING_PROMPT = `Transcribe the text content from an image page and output in Markdown syntax (not code blocks). Follow these steps: + +1. Examine the provided page carefully. + +2. Identify all elements present in the page, including headers, body text, footnotes, tables, visulizations, captions, and page numbers, etc. + +3. Use markdown syntax to format your output: + - Headings: # for main, ## for sections, ### for subsections, etc. + - Lists: * or - for bulleted, 1. 2. 3. for numbered + - Do not repeat yourself + +4. If the element is a visulization + - Provide a detailed description in natural language + - Do not transcribe text in the visualization after providing the description + +5. If the element is a table + - Create a markdown table, ensuring every row has the same number of columns + - Maintain cell alignment as closely as possible + - Do not split a table into multiple tables + - If a merged cell spans multiple rows or columns, place the text in the top-left cell and output ' ' for other + - Use | for column separators, |-|-| for header row separators + - If a cell has multiple items, list them in separate rows + - If the table contains sub-headers, separate the sub-headers from the headers in another row + +6. If the element is a paragraph + - Transcribe each text element precisely as it appears + +7. If the element is a header, footer, footnote, page number + - Transcribe each text element precisely as it appears + +Output Example: + +A bar chart showing annual sales figures, with the y-axis labeled "Sales ($Million)" and the x-axis labeled "Year". The chart has bars for 2018 ($12M), 2019 ($18M), 2020 ($8M), and 2021 ($22M). +Figure 3: This chart shows annual sales in millions. The year 2020 was significantly down due to the COVID-19 pandemic. + +# Annual Report + +## Financial Highlights + +* Revenue: $40M +* Profit: $12M +* EPS: $1.25 + + +| | Year Ended December 31, | | +| | 2021 | 2022 | +|-|-|-| +| Cash provided by (used in): | | | +| Operating activities | $ 46,327 | $ 46,752 | +| Investing activities | (58,154) | (37,601) | +| Financing activities | 6,291 | 9,718 | + +Here is the image. +`; diff --git a/src/cdk-lib/bedrock/data-sources/parsing.ts b/src/cdk-lib/bedrock/data-sources/parsing.ts new file mode 100644 index 00000000..1599382e --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/parsing.ts @@ -0,0 +1,96 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import { CfnDataSource, IModel } from 'aws-cdk-lib/aws-bedrock'; +import { PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { DEFAULT_PARSING_PROMPT } from './default-parsing-prompt'; + +/** + * Enum representing the types of parsing strategies available for Amazon Bedrock Knowledge Bases. + */ +enum ParsingStategyType { + /** + * Uses a Bedrock Foundation Model for advanced parsing of non-textual information from documents. + */ + FOUNDATION_MODEL = 'BEDROCK_FOUNDATION_MODEL' +} + +/** + * Properties for configuring a Foundation Model parsing strategy. + */ +export interface FoundationModelParsingStategyProps { + /** + * The Foundation Model to use for parsing non-textual information. + * Currently supported models are Claude 3 Sonnet and Claude 3 Haiku. + */ + readonly parsingModel: IModel; + + /** + * Custom prompt to instruct the parser on how to interpret the document. + * + * @default - Uses the default instruction prompt as provided in the AWS Console. + */ + readonly parsingPrompt?: string; + +} + +/** + * Represents an advanced parsing strategy configuration for Knowledge Base ingestion. + * @see https://docs.aws.amazon.com/bedrock/latest/userguide/kb-chunking-parsing.html#kb-advanced-parsing + */ +export abstract class ParsingStategy { + + // ------------------------------------------------------ + // FM Parsing Strategy + // ------------------------------------------------------ + /** + * Creates a Foundation Model-based parsing strategy for extracting non-textual information + * from documents such as tables and charts. + * - Additional costs apply when using advanced parsing due to foundation model usage. + * - There are limits on file types (PDF) and total data that can be parsed using advanced parsing. + * @see https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-ds.html#kb-ds-supported-doc-formats-limits + */ + public static foundationModel(props: FoundationModelParsingStategyProps): ParsingStategy { + class FoundationModelTransformation extends ParsingStategy { + /** The CloudFormation property representation of this configuration */ + public readonly configuration = { + bedrockFoundationModelConfiguration: { + modelArn: props.parsingModel.modelArn, + parsingPrompt: { + parsingPromptText: props.parsingPrompt ?? DEFAULT_PARSING_PROMPT, + }, + }, + parsingStrategy: ParsingStategyType.FOUNDATION_MODEL, + }; + + public generatePolicyStatements(): PolicyStatement[] { + return [new PolicyStatement({ + actions: ['bedrock:InvokeModel'], + resources: [props.parsingModel.modelArn], + })]; + } + }; + + return new FoundationModelTransformation(); + } + // ------------------------------------------------------ + // Properties + // ------------------------------------------------------ + /** The CloudFormation property representation of this configuration */ + public abstract configuration: CfnDataSource.ParsingConfigurationProperty; + + public abstract generatePolicyStatements(): PolicyStatement[]; + + +} + diff --git a/src/cdk-lib/bedrock/data-sources/s3-data-source.ts b/src/cdk-lib/bedrock/data-sources/s3-data-source.ts new file mode 100644 index 00000000..294054b4 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/s3-data-source.ts @@ -0,0 +1,141 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { IKey } from 'aws-cdk-lib/aws-kms'; +import { IBucket } from 'aws-cdk-lib/aws-s3'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +import { IKnowledgeBase } from './../knowledge-base'; +import { DataSourceAssociationProps, DataSourceNew, DataSourceType } from './base-data-source'; +import { generatePhysicalNameV2 } from '../../../common/helpers/utils'; + + +/** + * Interface to add a new S3DataSource to an existing KB + */ +export interface S3DataSourceAssociationProps extends DataSourceAssociationProps { + /** + * The bucket that contains the data source. + */ + readonly bucket: IBucket; + + /** + * The prefixes of the objects in the bucket that should be included in the data source. + * + * @default - All objects in the bucket. + */ + readonly inclusionPrefixes?: string[]; + +} + +/** + * Interface to create a new S3 Data Source object. + */ +export interface S3DataSourceProps extends S3DataSourceAssociationProps { + /** + * The knowledge base to associate with the data source. + */ + readonly knowledgeBase: IKnowledgeBase; +} + + +/** + * Sets up an S3 Data Source to be added to a knowledge base. + */ +export class S3DataSource extends DataSourceNew { + // ------------------------------------------------------ + // Common attributes for all new data sources + // ------------------------------------------------------ + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public readonly dataSourceId: string; + /** + * The type of data source. + */ + public readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public readonly kmsKey?: IKey; + // ------------------------------------------------------ + // Unique to this class + // ------------------------------------------------------ + /** + * The bucket associated with the data source. + */ + public readonly bucket: IBucket; + // ------------------------------------------------------ + // Internal Only + // ------------------------------------------------------ + /** + * The Data Source cfn resource. + */ + private readonly __resource: CfnDataSource; + + + constructor(scope: Construct, id: string, props: S3DataSourceProps) { + super(scope, id); + // Assign attributes + this.knowledgeBase = props.knowledgeBase; + this.dataSourceType = DataSourceType.S3; + this.dataSourceName = props.dataSourceName ?? generatePhysicalNameV2(this, 's3-ds', { maxLength: 40, lower: true, separator: '-' });; + this.bucket = props.bucket; + this.kmsKey = props.kmsKey; + + // ------------------------------------------------------ + // Manage permissions for the data source + // ------------------------------------------------------ + this.handleCommonPermissions(props); + this.bucket.grantRead(this.knowledgeBase.role); + + NagSuppressions.addResourceSuppressions( + this.knowledgeBase.role, + [{ + id: 'AwsSolutions-IAM5', + reason: 'The KB role needs read only access to all objects in the data source bucket.', + }], + true, + ); + + // ------------------------------------------------------ + // L1 Instantiation + // ------------------------------------------------------ + this.__resource = new CfnDataSource(this, 'DataSource', { + ...this.formatAsCfnProps( + props, + { + type: this.dataSourceType, + s3Configuration: { + bucketArn: props.bucket.bucketArn, + inclusionPrefixes: props.inclusionPrefixes, + }, + }, + ), + }); + + this.dataSourceId = this.__resource.attrDataSourceId; + + + } +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/salesforce-data-source.ts b/src/cdk-lib/bedrock/data-sources/salesforce-data-source.ts new file mode 100644 index 00000000..64c33ecd --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/salesforce-data-source.ts @@ -0,0 +1,217 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { IKey } from 'aws-cdk-lib/aws-kms'; +import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; +import { Construct } from 'constructs'; + +import { IKnowledgeBase } from './../knowledge-base'; +import { DataSourceNew, DataSourceAssociationProps, DataSourceType } from './base-data-source'; +import { generatePhysicalNameV2 } from '../../../common/helpers/utils'; + + +/** + * Represents the authentication types available for connecting to a Salesforce data source. + */ +export enum SalesforceDataSourceAuthType { + /** + * Your secret authentication credentials in AWS Secrets Manager should include: + * - `consumerKey` (app client ID) + * - `consumerSecret` (client secret) + * - `authenticationUrl` + */ + OAUTH2_CLIENT_CREDENTIALS = 'OAUTH2_CLIENT_CREDENTIALS' +} + +/** + * Represents the Salesforce object types that can be accessed by the data source connector. + */ +export enum SalesforceObjectType { + ACCOUNT = 'Account', + ATTACHMENT = 'Attachment', + CAMPAIGN = 'Campaign', + CONTENT_VERSION = 'ContentVersion', + PARTNER = 'Partner', + PRICEBOOK_2 = 'Pricebook2', + CASE = 'Case', + CONTACT = 'Contact', + CONTRACT = 'Contract', + DOCUMENT = 'Document', + IDEA = 'Idea', + LEAD = 'Lead', + OPPORTUNITY = 'Opportunity', + PRODUCT_2 = 'Product2', + SOLUTION = 'Solution', + TASK = 'Task', + FEED_ITEM = 'FeedItem', + FEED_COMMENT = 'FeedComment', + KNOWLEDGE_KAV = 'Knowledge__kav', + USER = 'User', + COLLABORATION_GROUP = 'CollaborationGroup' +} + +/** + * Defines the crawling filters for Salesforce data ingestion. + */ +export interface SalesforceCrawlingFilters { + /** + * The Salesforce object type to which this filter applies. + */ + readonly objectType: SalesforceObjectType; + /** + * Regular expression patterns to include specific content. + */ + readonly includePatterns?: string[]; + /** + * Regular expression patterns to exclude specific content. + */ + readonly excludePatterns?: string[]; +} + +/** + * Interface to add a new data source to an existing KB. + */ +export interface SalesforceDataSourceAssociationProps extends DataSourceAssociationProps { + /** + * The Salesforce host URL or instance URL. + * @example "https://company.salesforce.com/" + */ + readonly endpoint: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials + * for your Salesforce instance URL. Secret must start with "AmazonBedrock-". + */ + readonly authSecret: ISecret; + /** + * The filters (regular expression patterns) for the crawling. + * If there's a conflict, the exclude pattern takes precedence. + * @default None - all your content is crawled. + */ + readonly filters?: SalesforceCrawlingFilters[]; + +} + +/** + * Interface to create a new standalone data source object. + */ +export interface SalesforceDataSourceProps extends SalesforceDataSourceAssociationProps { + /** + * The knowledge base to associate with the data source. + */ + readonly knowledgeBase: IKnowledgeBase; +} + +/** + * Sets up an data source to be added to a knowledge base. + */ +export class SalesforceDataSource extends DataSourceNew { + // ------------------------------------------------------ + // Common attributes for all new data sources + // ------------------------------------------------------ + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public readonly dataSourceId: string; + /** + * The type of data source. + */ + public readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public readonly kmsKey?: IKey; + // ------------------------------------------------------ + // Unique to this class + // ------------------------------------------------------ + /** + * The Salesforce host URL or instance URL. + */ + public readonly endpoint: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials. + */ + public readonly authSecret: ISecret; + // ------------------------------------------------------ + // Internal Only + // ------------------------------------------------------ + /** + * The Data Source cfn resource. + */ + private readonly __resource: CfnDataSource; + + + constructor(scope: Construct, id: string, props: SalesforceDataSourceProps) { + super(scope, id); + // Assign attributes + this.knowledgeBase = props.knowledgeBase; + this.dataSourceType = DataSourceType.SALESFORCE; + this.dataSourceName = props.dataSourceName ?? generatePhysicalNameV2(this, 'sfdc-ds', { maxLength: 40, lower: true, separator: '-' });; + this.endpoint = props.endpoint; + this.authSecret = props.authSecret; + this.kmsKey = props.kmsKey; + + // ------------------------------------------------------ + // Manage permissions for the data source + // ------------------------------------------------------ + this.handleCommonPermissions(props); + this.authSecret.grantRead(this.knowledgeBase.role); + + // ------------------------------------------------------ + + // ------------------------------------------------------ + // L1 Instantiation + // ------------------------------------------------------ + this.__resource = new CfnDataSource(this, 'DataSource', { + ...this.formatAsCfnProps( + props, + { + type: this.dataSourceType, + salesforceConfiguration: { + sourceConfiguration: { + authType: SalesforceDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS, + credentialsSecretArn: this.authSecret.secretArn, + hostUrl: this.endpoint, + }, + crawlerConfiguration: + (props.filters) ? ({ + filterConfiguration: { + type: 'PATTERN', + patternObjectFilter: { + filters: props.filters?.map(item => ({ + objectType: item.objectType, + inclusionFilters: item.includePatterns, + exclusionFilters: item.excludePatterns, + })), + }, + }, + }) : undefined, + }, + }, + ), + }); + + this.dataSourceId = this.__resource.attrDataSourceId; + + + } +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/sharepoint-data-source.ts b/src/cdk-lib/bedrock/data-sources/sharepoint-data-source.ts new file mode 100644 index 00000000..2a13db04 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/sharepoint-data-source.ts @@ -0,0 +1,234 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { IKey } from 'aws-cdk-lib/aws-kms'; +import { ISecret } from 'aws-cdk-lib/aws-secretsmanager'; +import { Construct } from 'constructs'; + +import { IKnowledgeBase } from './../knowledge-base'; +import { DataSourceAssociationProps, DataSourceNew, DataSourceType } from './base-data-source'; +import { generatePhysicalNameV2 } from '../../../common/helpers/utils'; + +/** + * Represents the authentication types available for connecting to a SharePoint data source. + */ +export enum SharePointDataSourceAuthType { + /** + * OAuth 2.0 Client Credentials flow for authentication with SharePoint. + * Your secret authentication credentials in AWS Secrets Manager should include: + * - `username`: The admin username for SharePoint authentication + * - `password`: The admin password associated with the username + * - `clientId`: The client ID (also known as application ID) + * - `clientSecret`: The client secret + */ + OAUTH2_CLIENT_CREDENTIALS = 'OAUTH2_CLIENT_CREDENTIALS' +} + +/** + * Represents the SharePoint object types that can be accessed by the data source connector. + */ +export enum SharePointObjectType { + /** + * Represents a SharePoint page, which typically contains web parts and content. + */ + PAGE = 'Page', + + /** + * Represents a calendar event in SharePoint. + */ + EVENT = 'Event', + + /** + * Represents a file stored in SharePoint document libraries. + */ + FILE = 'File', +} + +/** + * Defines the crawling filters for SharePoint data ingestion. These filters allow + * you to specify which content should be included or excluded during the crawling process. + * If you specify an inclusion and exclusion filter and both match a document, + * the exclusion filter takes precedence and the document isn’t crawled. + */ +export interface SharePointCrawlingFilters { + /** + * The SharePoint object type this filter applies to. + */ + readonly objectType: SharePointObjectType; + /** + * Optional array of regular expression patterns to include specific content. + * Only content matching these patterns will be crawled. + * @example ['.*public.*', '.*shared.*'] + */ + readonly includePatterns?: string[]; + /** + * Optional array of regular expression patterns to exclude specific content. + * Content matching these patterns will be skipped during crawling. + * @example ['.*private.*', '.*confidential.*'] + */ + readonly excludePatterns?: string[]; +} + +/** + * Interface to add a new data source to an existing KB + */ +export interface SharePointDataSourceAssociationProps extends DataSourceAssociationProps { + /** + * The domain of your SharePoint instance or site URL/URLs. + * @example "yourdomain" + */ + readonly domain: string; + /** + * The SharePoint site URL/URLs. + * Must start with “https”. All URLs must start with same protocol. + * @example ["https://yourdomain.sharepoint.com/sites/mysite"] + */ + readonly siteUrls: string[]; + /** + * The identifier of your Microsoft 365 tenant. + * @example "d1c035a6-1dcf-457d-97e3" + */ + readonly tenantId: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials + * for your Sharepoint instance URL. Secret must start with "AmazonBedrock-". + */ + readonly authSecret: ISecret; + /** + * The filters (regular expression patterns) for the crawling. + * If there's a conflict, the exclude pattern takes precedence. + * @default None - all your content is crawled. + */ + readonly filters?: SharePointCrawlingFilters[]; + +} + +/** + * Interface to create a new standalone data source object + */ +export interface SharePointDataSourceProps extends SharePointDataSourceAssociationProps { + /** + * The knowledge base to associate with the data source. + */ + readonly knowledgeBase: IKnowledgeBase; +} + +/** + * Sets up an data source to be added to a knowledge base. + */ +export class SharePointDataSource extends DataSourceNew { + // ------------------------------------------------------ + // Common attributes for all new data sources + // ------------------------------------------------------ + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public readonly dataSourceId: string; + /** + * The type of data source. + */ + public readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public readonly kmsKey?: IKey; + // ------------------------------------------------------ + // Unique to this class + // ------------------------------------------------------ + /** + * The domain name of your SharePoint instance. + */ + public readonly domain: string; + /** + * The AWS Secrets Manager secret that stores your authentication credentials. + */ + public readonly authSecret: ISecret; + /** + * The SharePoint site URL/URLs. + */ + public readonly siteUrls: string[]; + // ------------------------------------------------------ + // Internal Only + // ------------------------------------------------------ + /** + * The Data Source cfn resource. + */ + private readonly __resource: CfnDataSource; + + + constructor(scope: Construct, id: string, props: SharePointDataSourceProps) { + super(scope, id); + // Assign attributes + this.knowledgeBase = props.knowledgeBase; + this.dataSourceType = DataSourceType.SHAREPOINT; + this.dataSourceName = props.dataSourceName ?? generatePhysicalNameV2(this, 'sharepoint-ds', { maxLength: 40, lower: true, separator: '-' });; + this.siteUrls = props.siteUrls; + this.domain = props.domain; + this.authSecret = props.authSecret; + this.kmsKey = props.kmsKey; + + // ------------------------------------------------------ + // Manage permissions for the data source + // ------------------------------------------------------ + this.handleCommonPermissions(props); + this.authSecret.grantRead(this.knowledgeBase.role); + + // ------------------------------------------------------ + // L1 Instantiation + // ------------------------------------------------------ + this.__resource = new CfnDataSource(this, 'DataSource', { + ...this.formatAsCfnProps( + props, + { + type: this.dataSourceType, + sharePointConfiguration: { + sourceConfiguration: { + authType: SharePointDataSourceAuthType.OAUTH2_CLIENT_CREDENTIALS, + credentialsSecretArn: this.authSecret.secretArn, + hostType: 'ONLINE', + domain: props.domain, + siteUrls: this.siteUrls, + tenantId: props.tenantId, + }, + crawlerConfiguration: + (props.filters) ? ({ + filterConfiguration: { + type: 'PATTERN', + patternObjectFilter: { + filters: props.filters?.map(item => ({ + objectType: item.objectType, + inclusionFilters: item.includePatterns, + exclusionFilters: item.excludePatterns, + })), + }, + }, + }) : undefined, + }, + }, + ), + }); + + this.dataSourceId = this.__resource.attrDataSourceId; + + + } +} \ No newline at end of file diff --git a/src/cdk-lib/bedrock/data-sources/web-crawler-data-source.ts b/src/cdk-lib/bedrock/data-sources/web-crawler-data-source.ts new file mode 100644 index 00000000..0df52387 --- /dev/null +++ b/src/cdk-lib/bedrock/data-sources/web-crawler-data-source.ts @@ -0,0 +1,190 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ +import { CfnDataSource } from 'aws-cdk-lib/aws-bedrock'; +import { IKey } from 'aws-cdk-lib/aws-kms'; +import { Construct } from 'constructs'; + +import { DataSourceNew, DataSourceAssociationProps, DataSourceType } from './base-data-source'; +import { generatePhysicalNameV2 } from '../../../common/helpers/utils'; +import { IKnowledgeBase } from '../knowledge-base'; + +/** + * The scope of the crawling. + */ +export enum CrawlingScope { + /** + * Crawls only web pages that belong to the same host or primary domain. + */ + HOST_ONLY = 'HOST_ONLY', + /** + * Includes subdomains in addition to the host or primary domain, i.e. + * web pages that contain "aws.amazon.com" can also include + * sub domain "docs.aws.amazon.com" + */ + SUBDOMAINS = 'SUBDOMAINS', + /** + * Limit crawling to web pages that belong to the same host and with the + * same initial URL path. + */ + DEFAULT = 'DEFAULT' +} + +/** + * The filters (regular expression patterns) to include or exclude in the crawling + * in accordance with your scope. + */ +export interface CrawlingFilters { + /** + * Include patterns. + */ + readonly includePatterns?: string[]; + /** + * Exclude paths. + */ + readonly excludePatterns?: string[]; +} + +/** + * Interface to add a new data source to an existing KB. + */ +export interface WebCrawlerDataSourceAssociationProps extends DataSourceAssociationProps { + /** + * The source urls in the format `https://www.sitename.com`. + * Maximum of 100 URLs. + */ + readonly sourceUrls: string[]; + /** + * The scope of the crawling. + * @default - CrawlingScope.DEFAULT + */ + readonly crawlingScope?: CrawlingScope; + /** + * The max rate at which pages are crawled, up to 300 per minute per host. + * Higher values will decrease sync time but increase the load on the host. + * @default 300 + */ + readonly crawlingRate?: number; + /** + * The filters (regular expression patterns) for the crawling. + * If there's a conflict, the exclude pattern takes precedence. + * @default None + */ + readonly filters?: CrawlingFilters; +} + +/** + * Interface to create a new standalone data source object. + */ +export interface WebCrawlerDataSourceProps extends WebCrawlerDataSourceAssociationProps { + /** + * The knowledge base to associate with the data source. + */ + readonly knowledgeBase: IKnowledgeBase; +} + +/** + * Sets up a web crawler data source to be added to a knowledge base. + */ +export class WebCrawlerDataSource extends DataSourceNew { + // ------------------------------------------------------ + // Common attributes for all new data sources + // ------------------------------------------------------ + /** + * The unique identifier of the data source. + * @example 'JHUEVXUZMU' + */ + public readonly dataSourceId: string; + /** + * The type of data source. + */ + public readonly dataSourceType: DataSourceType; + /** + * The name of the data source. + */ + public readonly dataSourceName: string; + /** + * The knowledge base associated with the data source. + */ + public readonly knowledgeBase: IKnowledgeBase; + /** + * The KMS key to use to encrypt the data source. + */ + public readonly kmsKey?: IKey; + // ------------------------------------------------------ + // Unique to this class + // ------------------------------------------------------ + /** + * The max rate at which pages are crawled. + */ + public readonly siteUrls: string[]; + /** + * The max rate at which pages are crawled. + */ + public readonly crawlingRate: number; + // ------------------------------------------------------ + // Internal Only + // ------------------------------------------------------ + /** + * The Data Source cfn resource. + */ + private readonly __resource: CfnDataSource; + + constructor(scope: Construct, id: string, props: WebCrawlerDataSourceProps) { + super(scope, id); + // Assign attributes + this.knowledgeBase = props.knowledgeBase; + this.dataSourceType = DataSourceType.WEB_CRAWLER; + this.dataSourceName = props.dataSourceName ?? generatePhysicalNameV2(this, 'crawler-ds', { maxLength: 40, lower: true, separator: '-' });; + this.kmsKey = props.kmsKey; + this.crawlingRate = props.crawlingRate ?? 300; + this.siteUrls = props.sourceUrls; + + // ------------------------------------------------------ + // Manage permissions for the data source + // ------------------------------------------------------ + this.handleCommonPermissions(props); + + // ------------------------------------------------------ + // L1 Instantiation + // ------------------------------------------------------ + + this.__resource = new CfnDataSource(this, 'DataSource', { + ...this.formatAsCfnProps( + props, + { + type: this.dataSourceType, + webConfiguration: { + crawlerConfiguration: { + crawlerLimits: { + rateLimit: this.crawlingRate, + }, + scope: (props.crawlingScope !== CrawlingScope.DEFAULT) ? props.crawlingScope : undefined, //?? CrawlingScope.HOST_ONLY, + inclusionFilters: props.filters?.includePatterns, + exclusionFilters: props.filters?.excludePatterns, + + }, + sourceConfiguration: { + urlConfiguration: { + seedUrls: props.sourceUrls.map(item => ({ url: item })), + }, + }, + }, + }, + ), + }); + + this.dataSourceId = this.__resource.attrDataSourceId; + + + } +} diff --git a/src/cdk-lib/bedrock/index.ts b/src/cdk-lib/bedrock/index.ts index 34ccc036..1df04c57 100644 --- a/src/cdk-lib/bedrock/index.ts +++ b/src/cdk-lib/bedrock/index.ts @@ -13,7 +13,6 @@ export * from './models'; export * from './knowledge-base'; -export * from './s3-data-source'; export * from './agent'; export * from './agent-alias'; export * from './agent-action-group'; @@ -25,3 +24,12 @@ export * from './pii-list'; export * from './content-policy'; export * from './prompt'; export * from './prompt-version'; +export * from './data-sources/base-data-source'; +export * from './data-sources/chunking'; +export * from './data-sources/parsing'; +export * from './data-sources/custom-transformation'; +export * from './data-sources/web-crawler-data-source'; +export * from './data-sources/sharepoint-data-source'; +export * from './data-sources/confluence-data-source'; +export * from './data-sources/salesforce-data-source'; +export * from './data-sources/s3-data-source'; \ No newline at end of file diff --git a/src/cdk-lib/bedrock/knowledge-base.ts b/src/cdk-lib/bedrock/knowledge-base.ts index 95fd9446..cd479d98 100644 --- a/src/cdk-lib/bedrock/knowledge-base.ts +++ b/src/cdk-lib/bedrock/knowledge-base.ts @@ -11,19 +11,19 @@ * and limitations under the License. */ -import * as cdk from 'aws-cdk-lib'; -import { aws_bedrock as bedrock } from 'aws-cdk-lib'; +import { ArnFormat, aws_bedrock as bedrock, IResource, Resource, Stack } from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import { NagSuppressions } from 'cdk-nag/lib/nag-suppressions'; import { Construct } from 'constructs'; import { Agent } from './../bedrock/agent'; +import { ConfluenceDataSource, ConfluenceDataSourceAssociationProps } from './data-sources/confluence-data-source'; +import { S3DataSource, S3DataSourceAssociationProps } from './data-sources/s3-data-source'; +import { SalesforceDataSource, SalesforceDataSourceAssociationProps } from './data-sources/salesforce-data-source'; +import { SharePointDataSource, SharePointDataSourceAssociationProps } from './data-sources/sharepoint-data-source'; +import { WebCrawlerDataSource, WebCrawlerDataSourceAssociationProps } from './data-sources/web-crawler-data-source'; import { BedrockFoundationModel } from './models'; import { generatePhysicalNameV2 } from '../../common/helpers/utils'; -import { - AmazonAuroraDefaultVectorStore, - AmazonAuroraVectorStore, -} from '../amazonaurora'; - +import { AmazonAuroraDefaultVectorStore, AmazonAuroraVectorStore } from '../amazonaurora'; import { VectorIndex } from '../opensearch-vectorindex'; import { VectorCollection } from '../opensearchserverless'; import { PineconeVectorStore } from '../pinecone'; @@ -93,6 +93,97 @@ interface StorageConfiguration { metadataField: string; } +/** + * Represents a Knowledge Base, either created with CDK or imported. + */ +export interface IKnowledgeBase extends IResource { + /** + * The ARN of the knowledge base. + * @example "arn:aws:bedrock:us-east-1:123456789012:knowledge-base/KB12345678" + */ + readonly knowledgeBaseArn: string; + + /** + * The ID of the knowledge base. + * @example "KB12345678" + */ + readonly knowledgeBaseId: string; + + /** + * The role associated with the knowledge base. + */ + readonly role: iam.IRole; + + /** + * Add an S3 data source to the knowledge base. + */ + addS3DataSource(props: S3DataSourceAssociationProps): S3DataSource; + + /** + * Add a web crawler data source to the knowledge base. + */ + addWebCrawlerDataSource(props: WebCrawlerDataSourceAssociationProps): WebCrawlerDataSource; + + /** + * Add a SharePoint data source to the knowledge base. + */ + addSharePointDataSource(props: SharePointDataSourceAssociationProps): SharePointDataSource; + + /** + * Add a Confluence data source to the knowledge base. + */ + addConfluenceDataSource(props: ConfluenceDataSourceAssociationProps): ConfluenceDataSource; + + /** + * Add a Salesforce data source to the knowledge base. + */ + addSalesforceDataSource(props: SalesforceDataSourceAssociationProps): SalesforceDataSource; +} + +/** + * Abstract base class for Knowledge Base. + * Contains methods valid for KBs either created with CDK or imported. + */ +abstract class KnowledgeBaseBase extends Resource implements IKnowledgeBase { + public abstract readonly knowledgeBaseArn: string; + public abstract readonly knowledgeBaseId: string; + public abstract readonly role: iam.IRole; + + constructor(scope: Construct, id: string) { super(scope, id); } + // ------------------------------------------------------ + // Helper methods to add Data Sources + // ------------------------------------------------------ + public addS3DataSource(props: S3DataSourceAssociationProps): S3DataSource { + return new S3DataSource(this, `s3-${props.bucket.node.addr}`, { + knowledgeBase: this, ...props, + }); + } + public addWebCrawlerDataSource(props: WebCrawlerDataSourceAssociationProps): WebCrawlerDataSource { + const url = new URL(props.sourceUrls[0]); + return new WebCrawlerDataSource(this, `web-${url.hostname.replace('.', '-')}`, { + knowledgeBase: this, ...props, + }); + } + public addSharePointDataSource(props: SharePointDataSourceAssociationProps): SharePointDataSource { + const url = new URL(props.siteUrls[0]); + return new SharePointDataSource(this, `sp-${url.hostname.replace('.', '-')}`, { + knowledgeBase: this, ...props, + }); + } + public addConfluenceDataSource(props: ConfluenceDataSourceAssociationProps): ConfluenceDataSource { + const url = new URL(props.confluenceUrl); + return new ConfluenceDataSource(this, `cf-${url.hostname.replace('.', '-')}`, { + knowledgeBase: this, ...props, + }); + } + public addSalesforceDataSource(props: SalesforceDataSourceAssociationProps): SalesforceDataSource { + const url = new URL(props.endpoint); + return new SalesforceDataSource(this, `sf-${url.hostname.replace('.', '-')}`, { + knowledgeBase: this, ...props, + }); + } +} + /** * Properties for a knowledge base */ @@ -121,7 +212,7 @@ export interface KnowledgeBaseProps { * this role will be able to invoke or use the * specified embeddings model within the Bedrock service. */ - readonly existingRole?: iam.Role; + readonly existingRole?: iam.IRole; /** * A narrative description of the knowledge base. @@ -188,12 +279,49 @@ export interface KnowledgeBaseProps { readonly tags?: Record; } +/** + * Properties for importing a knowledge base outside of this stack + */ +export interface KnowledgeBaseAttributes { + /** + * The ID of the knowledge base. + * @example "KB12345678" + */ + readonly knowledgeBaseId: string; + /** + * The Service Execution Role associated with the knowledge base. + * @example "arn:aws:iam::123456789012:role/AmazonBedrockExecutionRoleForKnowledgeBaseawscdkbdgeBaseKB12345678" + */ + readonly executionRoleArn: string; +} + /** * Deploys a Bedrock Knowledge Base and configures a backend by OpenSearch Serverless, * Pinecone, Redis Enterprise Cloud or Amazon Aurora PostgreSQL. * */ -export class KnowledgeBase extends Construct { +export class KnowledgeBase extends KnowledgeBaseBase { + // ------------------------------------------------------ + // Import Methods + // ------------------------------------------------------ + public static fromKnowledgeBaseAttributes(scope: Construct, id: string, attrs: KnowledgeBaseAttributes): IKnowledgeBase { + const stack = Stack.of(scope); + class Import extends KnowledgeBaseBase { + public readonly knowledgeBaseArn = stack.formatArn({ + service: 'bedrock', + resource: 'knowledge-base', + resourceName: attrs.knowledgeBaseId, + arnFormat: ArnFormat.SLASH_RESOURCE_NAME, + }); + public readonly role = iam.Role.fromRoleArn(this, `kb-${attrs.knowledgeBaseId}-role`, attrs.executionRoleArn); + public readonly knowledgeBaseId = attrs.knowledgeBaseId; + } + return new Import(scope, id); + + } + // ------------------------------------------------------ + // Attributes + // ------------------------------------------------------ /** * The name of the knowledge base. */ @@ -207,7 +335,7 @@ export class KnowledgeBase extends Construct { /** * The role the Knowledge Base uses to access the vector store and data source. */ - public readonly role: iam.Role; + public readonly role: iam.IRole; /** * The vector store for the knowledge base. @@ -232,6 +360,7 @@ export class KnowledgeBase extends Construct { * The ID of the knowledge base. */ public readonly knowledgeBaseId: string; + /** * The OpenSearch vector index for the knowledge base. * @private @@ -291,29 +420,22 @@ export class KnowledgeBase extends Construct { ); this.role = new iam.Role(this, 'Role', { roleName: roleName, - assumedBy: new iam.ServicePrincipal('bedrock.amazonaws.com'), - }); - this.role.assumeRolePolicy!.addStatements( - new iam.PolicyStatement({ - actions: ['sts:AssumeRole'], - principals: [new iam.ServicePrincipal('bedrock.amazonaws.com')], + assumedBy: new iam.ServicePrincipal('bedrock.amazonaws.com', { conditions: { - StringEquals: { - 'aws:SourceAccount': cdk.Stack.of(this).account, - }, + StringEquals: { 'aws:SourceAccount': Stack.of(this).account }, ArnLike: { - 'aws:SourceArn': cdk.Stack.of(this).formatArn({ + 'aws:SourceArn': Stack.of(this).formatArn({ service: 'bedrock', resource: 'knowledge-base', resourceName: '*', - arnFormat: cdk.ArnFormat.SLASH_RESOURCE_NAME, + arnFormat: ArnFormat.SLASH_RESOURCE_NAME, }), }, }, }), - ); + }); - this.role.addToPolicy( + this.role.addToPrincipalPolicy( new iam.PolicyStatement({ actions: ['bedrock:InvokeModel'], resources: [embeddingsModel.asArn(this)], @@ -358,7 +480,7 @@ export class KnowledgeBase extends Construct { * other than OpenSearch Serverless. */ if (!(this.vectorStore instanceof VectorCollection)) { - this.role.addToPolicy( + this.role.addToPrincipalPolicy( new iam.PolicyStatement({ actions: ['secretsmanager:GetSecretValue'], resources: [this.vectorStore.credentialsSecretArn], @@ -377,7 +499,7 @@ export class KnowledgeBase extends Construct { this.vectorStore instanceof AmazonAuroraDefaultVectorStore || this.vectorStore instanceof AmazonAuroraVectorStore ) { - this.role.addToPolicy( + this.role.addToPrincipalPolicy( new iam.PolicyStatement({ actions: [ 'rds-data:ExecuteStatement', @@ -495,11 +617,11 @@ export class KnowledgeBase extends Construct { 'bedrock:TagResource', ], resources: [ - cdk.Stack.of(this).formatArn({ + Stack.of(this).formatArn({ service: 'bedrock', resource: 'knowledge-base', resourceName: '*', - arnFormat: cdk.ArnFormat.SLASH_RESOURCE_NAME, + arnFormat: ArnFormat.SLASH_RESOURCE_NAME, }), ], }), diff --git a/src/cdk-lib/bedrock/s3-data-source.ts b/src/cdk-lib/bedrock/s3-data-source.ts deleted file mode 100644 index 569250d3..00000000 --- a/src/cdk-lib/bedrock/s3-data-source.ts +++ /dev/null @@ -1,219 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance - * with the License. A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES - * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions - * and limitations under the License. - */ - -import { aws_bedrock as bedrock } from 'aws-cdk-lib'; -import * as kms from 'aws-cdk-lib/aws-kms'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import { NagSuppressions } from 'cdk-nag'; -import { Construct } from 'constructs'; - -import { KnowledgeBase } from './knowledge-base'; - -const CHUNKING_OVERLAP = 20; -const CHUNKING_MAX_TOKENS = 300; - -/** - * Knowledge base can split your source data into chunks. A chunk refers to an - * excerpt from a data source that is returned when the knowledge base that it - * belongs to is queried. You have the following options for chunking your - * data. If you opt for NONE, then you may want to pre-process your files by - * splitting them up such that each file corresponds to a chunk. - */ -export enum ChunkingStrategy { - /** - * Amazon Bedrock splits your source data into chunks of the approximate size - * that you set in the `fixedSizeChunkingConfiguration`. - */ - FIXED_SIZE = 'FIXED_SIZE', - /** - * `FIXED_SIZE` with the default chunk size of 300 tokens and 20% overlap. - * If default is selected, chunk size and overlap set by the user will be - * ignored. - */ - DEFAULT = 'DEFAULT', - /** - * Amazon Bedrock treats each file as one chunk. If you choose this option, - * you may want to pre-process your documents by splitting them into separate - * files. - */ - NONE = 'NONE', -} - -/** - * Properties for an S3 Data Source. - */ -export interface S3DataSourceProps { - /** - * The knowledge base that this data source belongs to. - */ - readonly knowledgeBase: KnowledgeBase; - /** - * The name of the data source. - */ - readonly dataSourceName: string; - /** - * The bucket that contains the data source. - */ - readonly bucket: s3.IBucket; - /** - * The prefixes of the objects in the bucket that should be included in the data source. - * - * @default - All objects in the bucket. - */ - readonly inclusionPrefixes?: string[]; - /** - * The chunking strategy to use. - * - * @default ChunkingStrategy.DEFAULT - */ - readonly chunkingStrategy?: ChunkingStrategy; - /** - * The maximum number of tokens to use in a chunk. - * - * @default 300 - */ - readonly maxTokens?: number; - /** - * The percentage of overlap to use in a chunk. - * - * @default 20 - */ - readonly overlapPercentage?: number; - /** - * The KMS key to use to encrypt the data source. - * - * @default Amazon Bedrock encrypts your data with a key that AWS owns and manages - */ - readonly kmsKey?: kms.IKey; -} - -/** - * Sets up a data source to be added to a knowledge base. - */ -export class S3DataSource extends Construct { - /** - * The Data Source cfn resource. - */ - public readonly dataSource: bedrock.CfnDataSource; - /** - * The unique identifier of the data source. - */ - public readonly dataSourceId: string; - - constructor(scope: Construct, id: string, props: S3DataSourceProps) { - super(scope, id); - const knowledgeBase = props.knowledgeBase; - const dataSourceName = props.dataSourceName; - const bucket = props.bucket; - const inclusionPrefixes = props.inclusionPrefixes; - const chunkingStrategy = props.chunkingStrategy ?? ChunkingStrategy.DEFAULT; - const maxTokens = props.maxTokens ?? CHUNKING_MAX_TOKENS; - const overlapPercentage = props.overlapPercentage ?? CHUNKING_OVERLAP; - const kmsKey = props.kmsKey; - - - bucket.grantRead(knowledgeBase.role); - NagSuppressions.addResourceSuppressions( - knowledgeBase.role, - [ - { - id: 'AwsSolutions-IAM5', - reason: 'The KB role needs read only access to all objects in the data source bucket.', - }, - ], - true, - ); - - this.dataSource = new bedrock.CfnDataSource(this, 'DataSource', { - knowledgeBaseId: knowledgeBase.knowledgeBaseId, - name: dataSourceName, - dataSourceConfiguration: { - type: 'S3', - s3Configuration: { - bucketArn: bucket.bucketArn, - inclusionPrefixes: inclusionPrefixes, - }, - }, - vectorIngestionConfiguration: vectorIngestionConfiguration( - chunkingStrategy, maxTokens, overlapPercentage, - ), - serverSideEncryptionConfiguration: kmsKey ? { - kmsKeyArn: kmsKey.keyArn, - } : undefined, - - }); - - this.dataSourceId = this.dataSource.attrDataSourceId; - } -} - -interface FixedSizeChunkingConfig { - maxTokens: number; - overlapPercentage: number; -} - -interface ChunkingConfig { - chunkingStrategy: ChunkingStrategy; - fixedSizeChunkingConfiguration?: FixedSizeChunkingConfig; -} - -interface VectorIngestionConfig { - chunkingConfiguration?: ChunkingConfig; -} - -function vectorIngestionConfiguration( - chunkingStrategy: ChunkingStrategy, - maxTokens: number = CHUNKING_MAX_TOKENS, - overlapPercentage: number = CHUNKING_OVERLAP, -): VectorIngestionConfig { - - if (chunkingStrategy === ChunkingStrategy.FIXED_SIZE) { - if (maxTokens <= 20 || maxTokens >= 8000) { - throw new Error(`maxTokens must be between 20 and 8000, got ${maxTokens}`); - } - - if (overlapPercentage < 0 || overlapPercentage > 100) { - throw new Error(`overlapPercentage must be between 0 and 100, got ${overlapPercentage}`); - } - - return { - chunkingConfiguration: { - chunkingStrategy: chunkingStrategy, - fixedSizeChunkingConfiguration: { - maxTokens, - overlapPercentage, - }, - }, - }; - - } else if (chunkingStrategy === ChunkingStrategy.NONE) { - - return { - chunkingConfiguration: { - chunkingStrategy, - }, - }; - - } else { // DEFAULT - return { - chunkingConfiguration: { - chunkingStrategy: ChunkingStrategy.FIXED_SIZE, - fixedSizeChunkingConfiguration: { - maxTokens: CHUNKING_MAX_TOKENS, - overlapPercentage: CHUNKING_OVERLAP, - }, - }, - }; - } - -} diff --git a/test/cdk-lib/bedrock/data-sources/other-data-sources.test.ts b/test/cdk-lib/bedrock/data-sources/other-data-sources.test.ts new file mode 100644 index 00000000..789825aa --- /dev/null +++ b/test/cdk-lib/bedrock/data-sources/other-data-sources.test.ts @@ -0,0 +1,517 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import { App, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { IKey, Key } from 'aws-cdk-lib/aws-kms'; +import { ISecret, Secret } from 'aws-cdk-lib/aws-secretsmanager'; +import * as bedrock from '../../../../src/cdk-lib/bedrock'; + + +describe('Data Source', () => { + let stack: Stack; + let kb: bedrock.KnowledgeBase; + let key: IKey; + + beforeEach(() => { + const app = new App(); + stack = new Stack(app, 'TestStack'); + kb = new bedrock.KnowledgeBase(stack, 'KB', { + embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1, + }); + const sampleKeyArn = 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418'; + key = Key.fromKeyArn(stack, 'TestKey', sampleKeyArn); + + }); + + test('Basic Web Crawler', () => { + kb.addWebCrawlerDataSource; + new bedrock.WebCrawlerDataSource(stack, 'TestDataSource', { + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + sourceUrls: ['https://example.com'], + crawlingScope: bedrock.CrawlingScope.SUBDOMAINS, + crawlingRate: 250, + kmsKey: key, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'TestDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'WEB', + WebConfiguration: { + CrawlerConfiguration: { + CrawlerLimits: { + RateLimit: 250, + }, + Scope: 'SUBDOMAINS', + ExclusionFilters: Match.absent(), + InclusionFilters: Match.absent(), + }, + SourceConfiguration: { + UrlConfiguration: { + SeedUrls: [{ + Url: 'https://example.com', + }], + }, + }, + }, + }, + }); + }); +}); + +describe('Third Party Data Source', () => { + let stack: Stack; + let kb: bedrock.KnowledgeBase; + let key: IKey; + let secret: ISecret; + + beforeEach(() => { + const app = new App(); + stack = new Stack(app, 'TestStack'); + kb = new bedrock.KnowledgeBase(stack, 'KB', { + embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1, + }); + const sampleKeyArn = 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418'; + const sampleSecretArn = 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1'; + key = Key.fromKeyArn(stack, 'TestKey', sampleKeyArn); + secret = Secret.fromSecretCompleteArn(stack, 'TestSecret', sampleSecretArn); + }); + + test('Basic Confluence Setup - Class', () => { + new bedrock.ConfluenceDataSource(stack, 'TestDataSource', { + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + authSecret: secret, + kmsKey: key, + confluenceUrl: 'https://example.atlassian.net', + filters: [ + { + objectType: bedrock.ConfluenceObjectType.ATTACHMENT, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.ConfluenceObjectType.PAGE, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'TestDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'CONFLUENCE', + ConfluenceConfiguration: { + SourceConfiguration: { + HostUrl: 'https://example.atlassian.net', + HostType: 'SAAS', + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Attachment', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'Page', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + test('Basic Confluence Setup - Method', () => { + kb.addConfluenceDataSource({ + dataSourceName: 'TestDataSource', + authSecret: secret, + kmsKey: key, + confluenceUrl: 'https://example.atlassian.net', + filters: [ + { + objectType: bedrock.ConfluenceObjectType.ATTACHMENT, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.ConfluenceObjectType.PAGE, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'TestDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'CONFLUENCE', + ConfluenceConfiguration: { + SourceConfiguration: { + HostUrl: 'https://example.atlassian.net', + HostType: 'SAAS', + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Attachment', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'Page', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + test('Basic Sharepoint Setup - Class', () => { + new bedrock.SharePointDataSource(stack, 'TestDataSource', { + knowledgeBase: kb, + dataSourceName: 'SharepointDataSource', + authSecret: secret, + kmsKey: key, + domain: 'yourdomain', + siteUrls: ['https://yourdomain.sharepoint.com/sites/mysite'], + tenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de', + filters: [ + { + objectType: bedrock.SharePointObjectType.PAGE, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.SharePointObjectType.FILE, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'SharepointDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'SHAREPOINT', + SharePointConfiguration: { + SourceConfiguration: { + TenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de', + HostType: 'ONLINE', + Domain: 'yourdomain', + SiteUrls: [ + 'https://yourdomain.sharepoint.com/sites/mysite', + ], + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Page', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'File', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + test('Basic Sharepoint Setup - Method', () => { + kb.addSharePointDataSource({ + dataSourceName: 'SharepointDataSource', + authSecret: secret, + kmsKey: key, + domain: 'yourdomain', + siteUrls: ['https://yourdomain.sharepoint.com/sites/mysite'], + tenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de', + filters: [ + { + objectType: bedrock.SharePointObjectType.PAGE, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.SharePointObjectType.FILE, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'SharepointDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'SHAREPOINT', + SharePointConfiguration: { + SourceConfiguration: { + TenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de', + HostType: 'ONLINE', + Domain: 'yourdomain', + SiteUrls: [ + 'https://yourdomain.sharepoint.com/sites/mysite', + ], + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Page', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'File', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + test('Basic SFDC Setup - Class', () => { + new bedrock.SalesforceDataSource(stack, 'TestDataSource', { + knowledgeBase: kb, + dataSourceName: 'SalesforceDataSource', + authSecret: secret, + kmsKey: key, + endpoint: 'https://company.salesforce.com/', + filters: [ + { + objectType: bedrock.SalesforceObjectType.CAMPAIGN, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.SalesforceObjectType.CONTRACT, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'SalesforceDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'SALESFORCE', + SalesforceConfiguration: { + SourceConfiguration: { + HostUrl: 'https://company.salesforce.com/', + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Campaign', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'Contract', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + test('Basic SFDC Setup - Method', () => { + kb.addSalesforceDataSource({ + dataSourceName: 'SalesforceDataSource', + authSecret: secret, + kmsKey: key, + endpoint: 'https://company.salesforce.com/', + filters: [ + { + objectType: bedrock.SalesforceObjectType.CAMPAIGN, + includePatterns: ['.*\\.pdf'], + excludePatterns: ['.*private.*\\.pdf'], + }, + { + objectType: bedrock.SalesforceObjectType.CONTRACT, + includePatterns: ['.*public.*\\.pdf'], + excludePatterns: ['.*confidential.*\\.pdf'], + }, + ], + }); + + console.log(Template.fromStack(stack)); + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + Name: 'SalesforceDataSource', + ServerSideEncryptionConfiguration: { + KmsKeyArn: 'arn:aws:kms:eu-central-1:123456789012:key/06484191-7d55-49fb-9be7-0baaf7fe8418', + }, + DataSourceConfiguration: { + Type: 'SALESFORCE', + SalesforceConfiguration: { + SourceConfiguration: { + HostUrl: 'https://company.salesforce.com/', + AuthType: 'OAUTH2_CLIENT_CREDENTIALS', + CredentialsSecretArn: 'arn:aws:secretsmanager:eu-central-1:123456789012:secret:AmazonBedrock-auth-tW8BY1', + }, + CrawlerConfiguration: { + FilterConfiguration: { + Type: 'PATTERN', + PatternObjectFilter: { + Filters: [ + { + ObjectType: 'Campaign', + InclusionFilters: [ + '.*\\.pdf', + ], + ExclusionFilters: [ + '.*private.*\\.pdf', + ], + }, + { + ObjectType: 'Contract', + InclusionFilters: [ + '.*public.*\\.pdf', + ], + ExclusionFilters: [ + '.*confidential.*\\.pdf', + ], + }, + ], + }, + }, + }, + }, + }, + }); + + }); + + +}); diff --git a/test/cdk-lib/bedrock/data-sources/s3-data-source.test.ts b/test/cdk-lib/bedrock/data-sources/s3-data-source.test.ts new file mode 100644 index 00000000..204a596d --- /dev/null +++ b/test/cdk-lib/bedrock/data-sources/s3-data-source.test.ts @@ -0,0 +1,298 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance + * with the License. A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES + * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions + * and limitations under the License. + */ + +import * as cdk from 'aws-cdk-lib'; +import { Template, Match } from 'aws-cdk-lib/assertions'; +import { FoundationModel, FoundationModelIdentifier } from 'aws-cdk-lib/aws-bedrock'; +import { Code, Function, Runtime } from 'aws-cdk-lib/aws-lambda'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import { AwsSolutionsChecks } from 'cdk-nag'; +import * as bedrock from '../../../../src/cdk-lib/bedrock'; + + +describe('S3 Data Source', () => { + let stack: cdk.Stack; + let bucket: s3.Bucket; + let kb: bedrock.KnowledgeBase; + + beforeEach(() => { + const app = new cdk.App(); + cdk.Aspects.of(app).add(new AwsSolutionsChecks()); + stack = new cdk.Stack(app, 'TestStack'); + bucket = new s3.Bucket(stack, 'TestBucket'); + kb = new bedrock.KnowledgeBase(stack, 'KB', { + embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V2_1024, + }); + }); + + test('Method', () => { + kb.addS3DataSource({ + bucket, + dataSourceName: 'TestDataSource', + }); + + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + KnowledgeBaseId: { + 'Fn::GetAtt': [Match.anyValue(), 'KnowledgeBaseId'], + }, + Name: 'TestDataSource', + DataSourceConfiguration: { + S3Configuration: { + BucketArn: { + 'Fn::GetAtt': [Match.anyValue(), 'Arn'], + }, + }, + }, + }); + }); + + test('Default chunking', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.DEFAULT, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: + { + ChunkingConfiguration: { + ChunkingStrategy: 'FIXED_SIZE', + FixedSizeChunkingConfiguration: { + MaxTokens: 300, + OverlapPercentage: 20, + }, + }, + }, + }); + }); + + test('Fixed size chunking', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.fixedSize({ + maxTokens: 1024, + overlapPercentage: 20, + }), + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + + VectorIngestionConfiguration: + { + ChunkingConfiguration: { + ChunkingStrategy: 'FIXED_SIZE', + FixedSizeChunkingConfiguration: { + MaxTokens: 1024, + OverlapPercentage: 20, + }, + }, + }, + }); + }); + + test('No chunking', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.NONE, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: + { + ChunkingConfiguration: + { ChunkingStrategy: 'NONE' }, + }, + }); + }); + + test('Semantic chunking - default', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.SEMANTIC, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + ChunkingConfiguration: { + ChunkingStrategy: 'SEMANTIC', + SemanticChunkingConfiguration: { + MaxTokens: 300, + BufferSize: 0, + BreakpointPercentileThreshold: 95, + }, + }, + }, + }); + }); + + test('Semantic chunking', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.semantic({ + maxTokens: 1024, + bufferSize: 1, + breakpointPercentileThreshold: 99, + }), + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + ChunkingConfiguration: { + ChunkingStrategy: 'SEMANTIC', + SemanticChunkingConfiguration: { + MaxTokens: 1024, + BufferSize: 1, + BreakpointPercentileThreshold: 99, + }, + }, + }, + }); + }); + + test('Hierarchical chunking - default', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.HIERARCHICAL_TITAN, + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + ChunkingConfiguration: { + ChunkingStrategy: 'HIERARCHICAL', + HierarchicalChunkingConfiguration: { + LevelConfigurations: [ + { MaxTokens: 1500 }, // Parent max tokens + { MaxTokens: 300 }, // Child max tokens + ], + OverlapTokens: 60, + }, + }, + }, + }); + }); + + test('Hierarchical chunking - custom', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + chunkingStrategy: bedrock.ChunkingStrategy.hierarchical({ + maxParentTokenSize: 1024, + maxChildTokenSize: 256, + overlapTokens: 30, + }), + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + ChunkingConfiguration: { + ChunkingStrategy: 'HIERARCHICAL', + HierarchicalChunkingConfiguration: { + LevelConfigurations: [ + { MaxTokens: 1024 }, // Parent max tokens + { MaxTokens: 256 }, // Child max tokens + ], + OverlapTokens: 30, + }, + }, + }, + }); + }); + + test('FM parsing', () => { + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + parsingStrategy: bedrock.ParsingStategy.foundationModel({ + parsingModel: FoundationModel.fromFoundationModelId(stack, 'model', + FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_SONNET_20240229_V1_0, + ), + }), + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + ParsingConfiguration: { + ParsingStrategy: 'BEDROCK_FOUNDATION_MODEL', + BedrockFoundationModelConfiguration: { + ModelArn: Match.anyValue(), + ParsingPrompt: { + ParsingPromptText: Match.stringLikeRegexp('Transcribe the text content.*'), + }, + }, + }, + }, + }); + }); + + test('Lambda Transformation', () => { + // WHEN + const bucket2 = new s3.Bucket(stack, 'mybucket', { + bucketName: 'mybucketname', + }); + const lambdaFunction = new Function(stack, 'myFunction', { + code: Code.fromInline('exports.handler = function(event, ctx, cb) { return cb(null, "hello"); }'), + handler: 'index.handler', + runtime: Runtime.PYTHON_3_11, + }); + new bedrock.S3DataSource(stack, 'TestDataSource', { + bucket, + knowledgeBase: kb, + dataSourceName: 'TestDataSource', + customTransformation: bedrock.CustomTransformation.lambda({ + lambdaFunction, + s3BucketUri: `s3://${bucket2.bucketName}/chunkprocessor`, + }), + }); + + // THEN + Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + VectorIngestionConfiguration: { + CustomTransformationConfiguration: { + Transformations: [{ + StepToApply: 'POST_CHUNKING', + TransformationFunction: { + TransformationLambdaConfiguration: { + LambdaArn: { + 'Fn::GetAtt': [Match.anyValue(), 'Arn'], + }, + }, + }, + }], + IntermediateStorage: { + S3Location: { + URI: Match.anyValue(), + }, + }, + }, + }, + }); + + }); + + +}); \ No newline at end of file diff --git a/test/cdk-lib/bedrock/integ-tests/prompts.integ.ts b/test/cdk-lib/bedrock/integ-tests/prompts.integ.ts deleted file mode 100644 index e8cde533..00000000 --- a/test/cdk-lib/bedrock/integ-tests/prompts.integ.ts +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance - * with the License. A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES - * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions - * and limitations under the License. - */ -import * as integ from '@aws-cdk/integ-tests-alpha'; -import * as cdk from 'aws-cdk-lib'; -import { aws_bedrock as cdk_bedrock } from 'aws-cdk-lib'; -import * as kms from 'aws-cdk-lib/aws-kms'; -import { Prompt, PromptVariant } from '../../../../src/cdk-lib/bedrock/prompt'; - -const app = new cdk.App(); -const stack = new cdk.Stack(app, 'aws-cdk-bedrock-prompts-integ-test'); - -const cmk = new kms.Key(stack, 'cmk', {}); -const claudeModel = cdk_bedrock.FoundationModel.fromFoundationModelId(stack, 'model1', cdk_bedrock.FoundationModelIdentifier.ANTHROPIC_CLAUDE_3_SONNET_20240229_V1_0); -const variant1 = PromptVariant.text({ - variantName: 'variant1', - model: claudeModel, - templateConfiguration: { - inputVariables: [{ name: 'topic' }], - text: 'This is my first text prompt. Please summarize our conversation on: {{topic}}.', - }, - inferenceConfiguration: { - temperature: 1.0, - topP: 0.999, - maxTokens: 2000, - topK: 250, - }, -}); - -const prompt1 = new Prompt(stack, 'prompt1', { - promptName: 'prompt1', - description: 'my first prompt', - defaultVariant: variant1, - variants: [variant1], - encryptionKey: cmk, -}); - -const variant2 = PromptVariant.text({ - variantName: 'variant2', - model: claudeModel, - templateConfiguration: { - inputVariables: [{ name: 'topic' }], - text: 'This is my second text prompt. Please summarize our conversation on: {{topic}}.', - }, - inferenceConfiguration: { - temperature: 0.5, - topP: 0.999, - maxTokens: 2000, - topK: 250, - }, -}); - -prompt1.addVariant(variant2); -prompt1.createVersion('my first version'); - -// const integ_case = -new integ.IntegTest(app, 'ServiceTest', { - testCases: [stack], - cdkCommandOptions: { - destroy: { - args: { - force: true, - }, - }, - }, -}); - -// integ_case.assertions.awsApiCall('bedrock-agent', 'GetPrompt', { -// promptIdentifier: prompt1.promptArn -// }) - -app.synth(); diff --git a/test/cdk-lib/bedrock/knowledge-base.test.ts b/test/cdk-lib/bedrock/knowledge-base.test.ts index 72d9f959..8dba8581 100644 --- a/test/cdk-lib/bedrock/knowledge-base.test.ts +++ b/test/cdk-lib/bedrock/knowledge-base.test.ts @@ -11,9 +11,11 @@ * and limitations under the License. */ -import { ABSENT, expect as cdkExpect, haveResourceLike } from '@aws-cdk/assert'; +import { ABSENT, expect as cdkExpect, haveResource, haveResourceLike } from '@aws-cdk/assert'; import * as cdk from 'aws-cdk-lib'; +import { aws_s3 as s3 } from 'aws-cdk-lib'; import { Annotations, Match, Template } from 'aws-cdk-lib/assertions'; + import { AwsSolutionsChecks } from 'cdk-nag'; import { AmazonAuroraDefaultVectorStore, @@ -118,17 +120,51 @@ describe('KnowledgeBase', () => { test('Should correctly initialize role with necessary permissions', () => { const vectorStore = new VectorCollection(stack, 'VectorCollection2'); const model = BedrockFoundationModel.TITAN_EMBED_TEXT_V1; - const knowledgeBase = new KnowledgeBase(stack, 'VectorKnowledgeBase2', { + new KnowledgeBase(stack, 'VectorKnowledgeBase2', { embeddingsModel: model, vectorStore: vectorStore, }); - const policyDocument = knowledgeBase.role.assumeRolePolicy?.toJSON(); - expect(policyDocument).toBeDefined(); - expect(policyDocument.Statement).toHaveLength(2); - expect(policyDocument.Statement[0].Action).toContain('sts:AssumeRole'); - expect(policyDocument.Statement[0].Principal).toHaveProperty('Service'); - expect(policyDocument.Statement[0].Principal.Service).toContain('bedrock.amazonaws.com'); + cdkExpect(stack).to( + haveResourceLike('AWS::IAM::Role', { + AssumeRolePolicyDocument: { + Statement: [ + { + Effect: 'Allow', + Principal: { + Service: 'bedrock.amazonaws.com', + }, + Action: 'sts:AssumeRole', + Condition: { + StringEquals: { + 'aws:SourceAccount': '123456789012', + }, + ArnLike: { + 'aws:SourceArn': { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':bedrock:us-east-1:123456789012:knowledge-base/*', + ], + ], + }, + }, + }, + }, + ], + }, + }), + ); + // const policyDocument = knowledgeBase.role.?.toJSON(); + // expect(policyDocument).toBeDefined(); + // expect(policyDocument.Statement).toHaveLength(2); + // expect(policyDocument.Statement[0].Action).toContain('sts:AssumeRole'); + // expect(policyDocument.Statement[0].Principal).toHaveProperty('Service'); + // expect(policyDocument.Statement[0].Principal.Service).toContain('bedrock.amazonaws.com'); }); test('Should throw error when vectorStore is not VectorCollection and indexName is provided', () => { @@ -252,4 +288,48 @@ describe('KnowledgeBase', () => { ); }); + test('Imported Knowledge Base', () => { + const kb = KnowledgeBase.fromKnowledgeBaseAttributes(stack, 'ImportedKnowledgeBase', { + knowledgeBaseId: 'OVGH4TEBDH', + executionRoleArn: 'arn:aws:iam::123456789012:role/AmazonBedrockExecutionRoleForKnowledgeBaseawscdkbdgeBaseE9B1DDDC', + }); + + expect(kb.knowledgeBaseId).toEqual('OVGH4TEBDH'); + expect(kb.role.roleArn).toEqual('arn:aws:iam::123456789012:role/AmazonBedrockExecutionRoleForKnowledgeBaseawscdkbdgeBaseE9B1DDDC'); + expect(kb.role.roleName).toEqual('AmazonBedrockExecutionRoleForKnowledgeBaseawscdkbdgeBaseE9B1DDDC'); + expect(kb.knowledgeBaseArn).toMatch(new RegExp('arn:.*:bedrock:us-east-1:123456789012:knowledge-base\/OVGH4TEBDH$')); + + }); + + test('Imported Knowledge Base - Data Source Method', () => { + const kb2 = KnowledgeBase.fromKnowledgeBaseAttributes(stack, 'ImportedKnowledgeBase2', { + knowledgeBaseId: 'OVGH4TEBDH', + executionRoleArn: 'arn:aws:iam::123456789012:role/service-role/AmazonBedrockExecutionRoleForKnowledgeBase_9ivh2', + }); + const bucket = s3.Bucket.fromBucketArn(stack, 's3-imported', + 'arn:aws:s3:::aws-cdk-bedrock-test-bucket-83908e77-cdxrc7lilg6v', + ); + + const s3datasource = kb2.addS3DataSource({ + bucket, + dataSourceName: 'TestDataSourceS3', + }); + + expect(s3datasource.dataSourceType).toEqual('S3'); + expect(s3datasource.knowledgeBase.knowledgeBaseId).toEqual('OVGH4TEBDH'); + cdkExpect(stack).to(haveResource('AWS::Bedrock::DataSource')); + + // console.log(Template.fromStack(stack).toJSON) + + // Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { + // KnowledgeBaseId: 'OVGH4TEBDH', + // Name: 'TestDataSourceS3', + // DataSourceConfiguration: { + // S3Configuration: { + // BucketArn: 'arn:aws:s3:::aws-cdk-bedrock-test-bucket-83908e77-cdxrc7lilg6v' + // }, + // }, + // }); + }); + }); \ No newline at end of file diff --git a/test/cdk-lib/bedrock/s3-data-source.test.ts b/test/cdk-lib/bedrock/s3-data-source.test.ts deleted file mode 100644 index 0eb8801f..00000000 --- a/test/cdk-lib/bedrock/s3-data-source.test.ts +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance - * with the License. A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES - * OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions - * and limitations under the License. - */ - -import * as cdk from 'aws-cdk-lib'; -import { Template } from 'aws-cdk-lib/assertions'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import { AwsSolutionsChecks } from 'cdk-nag'; -import * as bedrock from '../../../src/cdk-lib/bedrock'; - -// mock lambda.Code.fromDockerBuild() -jest.mock('aws-cdk-lib/aws-lambda', () => { - const actualLambda = jest.requireActual('aws-cdk-lib/aws-lambda'); - return { - ...actualLambda, - Code: { - ...actualLambda.Code, - fromDockerBuild: jest.fn(() => actualLambda.Code.fromInline('mockCode')), - fromAsset: jest.fn(() => actualLambda.Code.fromInline('mockCode')), - }, - }; -}); - -describe('S3 Data Source', () => { - let stack: cdk.Stack; - let bucket: s3.Bucket; - let kb: bedrock.KnowledgeBase; - - beforeEach(() => { - const app = new cdk.App(); - cdk.Aspects.of(app).add(new AwsSolutionsChecks()); - stack = new cdk.Stack(app, 'TestStack'); - bucket = new s3.Bucket(stack, 'TestBucket'); - kb = new bedrock.KnowledgeBase(stack, 'KB', { - embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1, - }); - }); - - test('Fixed size chunking', () => { - new bedrock.S3DataSource(stack, 'TestDataSource', { - bucket, - knowledgeBase: kb, - dataSourceName: 'TestDataSource', - chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE, - maxTokens: 1024, - overlapPercentage: 20, - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { - - VectorIngestionConfiguration: - { - ChunkingConfiguration: { - ChunkingStrategy: 'FIXED_SIZE', - FixedSizeChunkingConfiguration: { - MaxTokens: 1024, - OverlapPercentage: 20, - }, - }, - }, - }); - }); - - test('Default chunking', () => { - new bedrock.S3DataSource(stack, 'TestDataSource', { - bucket, - knowledgeBase: kb, - dataSourceName: 'TestDataSource', - chunkingStrategy: bedrock.ChunkingStrategy.DEFAULT, - maxTokens: 1024, - overlapPercentage: 20, - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { - - VectorIngestionConfiguration: - { - ChunkingConfiguration: { - ChunkingStrategy: 'FIXED_SIZE', - FixedSizeChunkingConfiguration: { - MaxTokens: 300, - OverlapPercentage: 20, - }, - }, - }, - }); - }); - - test('No chunking', () => { - new bedrock.S3DataSource(stack, 'TestDataSource', { - bucket, - knowledgeBase: kb, - dataSourceName: 'TestDataSource', - chunkingStrategy: bedrock.ChunkingStrategy.NONE, - }); - - Template.fromStack(stack).hasResourceProperties('AWS::Bedrock::DataSource', { - VectorIngestionConfiguration: - { - ChunkingConfiguration: - { ChunkingStrategy: 'NONE' }, - }, - }); - }); - -}); \ No newline at end of file