From b2927e6209c5520a21004164e19515cbbd868421 Mon Sep 17 00:00:00 2001 From: LiranCohen Date: Tue, 15 Aug 2023 18:08:28 -0400 Subject: [PATCH] Data below a threshold should not be stored in DataStore. (#456) * add encodedData field to RecordsWrite in message storage under a threshold In this commit I've added utilized the existing RecordsWriteMessageWithOptionalEncodedData class to leverage writing data from RecordsWrite messages directly into the MessageStore when the data is below the 50k byte threshold. For now I'm still storing in DataStore, but that will be removed in subsequent commits. I had to take care and remove `encodedData` from messages when computing the CID and made sure to use the Message.getCid() helper for computing CIDs for message storage. Additionally, I've made sure to prune the original RecordsWrite of any encodedData when cleanup happens. * do not store data smaller tahn a threshold in the DataStore In this commit I've built on the previous one by not storing data larger than the treshold within the DataStore and keepign it only embedded within the MessageStore. I've imicked the sareguards and errors from putData into processEncodedData taking into account encodedData. I've also implemented tests for various data sizes in certain places where I noticed it may matter. I'm missing some additional tests that I will write in subsequent commits. * added tests for MessagesGet and for Message getCid * fixed broken tests that weren't running, added tests to check paths between putData and processEncodedData * fix tests that weren't running * more coverage for records-read * be explicit when testing for thresholds * refactor validation of data * remove backward compatibile code, update tests * remove unneeded tests for backward compatibilty * remove storage controller query method, replace it with messageStore querymethod * remove unneeded comment about storage controller from * comments and variables for clarity * comments for further clarity around encodedData * readme update after rebase --- README.md | 2 +- src/core/dwn-constant.ts | 3 +- src/core/dwn-error.ts | 3 +- src/core/message.ts | 9 +- src/handlers/messages-get.ts | 27 +- src/handlers/records-query.ts | 11 +- src/handlers/records-read.ts | 29 +- src/handlers/records-write.ts | 150 +++++++--- src/store/message-store-level.ts | 9 +- src/store/storage-controller.ts | 39 +-- tests/core/message.spec.ts | 16 ++ tests/handlers/messages-get.spec.ts | 6 +- tests/handlers/records-query.spec.ts | 8 +- tests/handlers/records-read.spec.ts | 70 ++++- tests/handlers/records-write.spec.ts | 413 +++++++++++++++++++++++++-- 15 files changed, 640 insertions(+), 155 deletions(-) diff --git a/README.md b/README.md index 534a3c119..64719e205 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Decentralized Web Node (DWN) SDK Code Coverage -![Statements](https://img.shields.io/badge/statements-97.51%25-brightgreen.svg?style=flat) ![Branches](https://img.shields.io/badge/branches-94.35%25-brightgreen.svg?style=flat) ![Functions](https://img.shields.io/badge/functions-93.86%25-brightgreen.svg?style=flat) ![Lines](https://img.shields.io/badge/lines-97.51%25-brightgreen.svg?style=flat) +![Statements](https://img.shields.io/badge/statements-97.53%25-brightgreen.svg?style=flat) ![Branches](https://img.shields.io/badge/branches-94.42%25-brightgreen.svg?style=flat) ![Functions](https://img.shields.io/badge/functions-93.88%25-brightgreen.svg?style=flat) ![Lines](https://img.shields.io/badge/lines-97.53%25-brightgreen.svg?style=flat) - [Introduction](#introduction) diff --git a/src/core/dwn-constant.ts b/src/core/dwn-constant.ts index 48cdc3f99..1223f0093 100644 --- a/src/core/dwn-constant.ts +++ b/src/core/dwn-constant.ts @@ -1,6 +1,7 @@ export class DwnConstant { /** * The maximum size in bytes of raw data that will be returned as `encodedData`. + * this is also the maximum size that we will store within MessageStore. */ - public static readonly maxDataSizeAllowedToBeEncoded = 10_000; + public static readonly maxDataSizeAllowedToBeEncoded = 50_000; } \ No newline at end of file diff --git a/src/core/dwn-error.ts b/src/core/dwn-error.ts index 860f8efa6..b893ef2c3 100644 --- a/src/core/dwn-error.ts +++ b/src/core/dwn-error.ts @@ -48,7 +48,8 @@ export enum DwnErrorCode { RecordsWriteDataCidMismatch = 'RecordsWriteDataCidMismatch', RecordsWriteDataSizeMismatch = 'RecordsWriteDataSizeMismatch', RecordsWriteMissingAuthorizationSignatureInput = 'RecordsWriteMissingAuthorizationSignatureInput', - RecordsWriteMissingData = 'RecordsWriterMissingData', + RecordsWriteMissingDataInPrevious = 'RecordsWriteMissingDataInPrevious', + RecordsWriteMissingDataAssociation = 'RecordsWriteMissingDataAssociation', RecordsWriteMissingDataStream = 'RecordsWriteMissingDataStream', RecordsWriteMissingProtocol = 'RecordsWriteMissingProtocol', RecordsWriteMissingSchema = 'RecordsWriteMissingSchema', diff --git a/src/core/message.ts b/src/core/message.ts index 921f9e750..6b29fcd52 100644 --- a/src/core/message.ts +++ b/src/core/message.ts @@ -86,7 +86,14 @@ export abstract class Message { // the message will contain properties that should not be part of the CID computation // and we need to strip them out (like `encodedData` that we historically had for a long time), // but we can remove this method entirely if the code becomes stable and it is apparent that the wrapper is not needed - const cid = await Cid.computeCid(message); + + // ^--- seems like we might need to keep this around for now. + const rawMessage = { ...message } as any; + if (rawMessage.encodedData) { + delete rawMessage.encodedData; + } + + const cid = await Cid.computeCid(rawMessage as GenericMessage); return cid; } diff --git a/src/handlers/messages-get.ts b/src/handlers/messages-get.ts index f4e896e6c..90ebd0481 100644 --- a/src/handlers/messages-get.ts +++ b/src/handlers/messages-get.ts @@ -2,16 +2,13 @@ import type { DataStore } from '../types/data-store.js'; import type { DidResolver } from '../did/did-resolver.js'; import type { MessageStore } from '../types/message-store.js'; import type { MethodHandler } from '../types/method-handler.js'; -import type { RecordsWriteMessage } from '../types/records-types.js'; +import type { RecordsWriteMessageWithOptionalEncodedData } from '../store/storage-controller.js'; import type { MessagesGetMessage, MessagesGetReply, MessagesGetReplyEntry } from '../types/messages-types.js'; -import { DataStream } from '../utils/data-stream.js'; -import { DwnConstant } from '../core/dwn-constant.js'; -import { Encoder } from '../utils/encoder.js'; import { messageReplyFromError } from '../core/message-reply.js'; import { MessagesGet } from '../interfaces/messages-get.js'; import { authenticate, authorize } from '../core/auth.js'; -import { DwnInterfaceName, DwnMethodName, Message } from '../core/message.js'; +import { DwnInterfaceName, DwnMethodName } from '../core/message.js'; type HandleArgs = { tenant: string, message: MessagesGetMessage }; @@ -54,7 +51,6 @@ export class MessagesGetHandler implements MethodHandler { // for every message, include associated data as `encodedData` IF: // * its a RecordsWrite // * the data size is equal or smaller than the size threshold - //! NOTE: this is somewhat duplicate code that also exists in `StorageController.query`. for (const entry of messages) { const { message } = entry; @@ -67,19 +63,12 @@ export class MessagesGetHandler implements MethodHandler { continue; } - // RecordsWrite specific handling - const recordsWrite = message as RecordsWriteMessage; - const dataCid = recordsWrite.descriptor.dataCid; - const dataSize = recordsWrite.descriptor.dataSize; - - if (dataCid !== undefined && dataSize! <= DwnConstant.maxDataSizeAllowedToBeEncoded) { - const messageCid = await Message.getCid(message); - const result = await this.dataStore.get(tenant, messageCid, dataCid); - - if (result) { - const dataBytes = await DataStream.toBytes(result.dataStream); - entry.encodedData = Encoder.bytesToBase64Url(dataBytes); - } + // RecordsWrite specific handling, if MessageStore has embedded `encodedData` return it with the entry. + // we store `encodedData` along with the message if the data is below a certain threshold. + const recordsWrite = message as RecordsWriteMessageWithOptionalEncodedData; + if (recordsWrite.encodedData !== undefined) { + entry.encodedData = recordsWrite.encodedData; + delete recordsWrite.encodedData; } } diff --git a/src/handlers/records-query.ts b/src/handlers/records-query.ts index 2a72ef2db..41bdf4f59 100644 --- a/src/handlers/records-query.ts +++ b/src/handlers/records-query.ts @@ -6,7 +6,6 @@ import type { RecordsQueryMessage, RecordsQueryReply, RecordsQueryReplyEntry, Re import { authenticate } from '../core/auth.js'; import { lexicographicalCompare } from '../utils/string.js'; import { messageReplyFromError } from '../core/message-reply.js'; -import { StorageController } from '../store/storage-controller.js'; import { DateSort, RecordsQuery } from '../interfaces/records-query.js'; import { DwnInterfaceName, DwnMethodName } from '../core/message.js'; @@ -86,7 +85,8 @@ export class RecordsQueryHandler implements MethodHandler { method : DwnMethodName.Write, isLatestBaseState : true }; - const records = await StorageController.query(this.messageStore, this.dataStore, tenant, filter); + + const records = (await this.messageStore.query(tenant, filter)) as RecordsWriteMessageWithOptionalEncodedData[]; return records; } @@ -133,7 +133,7 @@ export class RecordsQueryHandler implements MethodHandler { published : true, isLatestBaseState : true }; - const publishedRecords = await StorageController.query(this.messageStore, this.dataStore, tenant, filter); + const publishedRecords = (await this.messageStore.query(tenant, filter)) as RecordsWriteMessageWithOptionalEncodedData[]; return publishedRecords; } @@ -152,8 +152,7 @@ export class RecordsQueryHandler implements MethodHandler { isLatestBaseState : true, published : false }; - const unpublishedRecordsForQueryAuthor = await StorageController.query(this.messageStore, this.dataStore, tenant, filter); - + const unpublishedRecordsForQueryAuthor = (await this.messageStore.query(tenant, filter)) as RecordsWriteMessageWithOptionalEncodedData[]; return unpublishedRecordsForQueryAuthor; } @@ -172,7 +171,7 @@ export class RecordsQueryHandler implements MethodHandler { isLatestBaseState : true, published : false }; - const unpublishedRecordsForQueryAuthor = await StorageController.query(this.messageStore, this.dataStore, tenant, filter); + const unpublishedRecordsForQueryAuthor = (await this.messageStore.query(tenant, filter)) as RecordsWriteMessageWithOptionalEncodedData[]; return unpublishedRecordsForQueryAuthor; } } diff --git a/src/handlers/records-read.ts b/src/handlers/records-read.ts index 7bff63acd..2bac998cf 100644 --- a/src/handlers/records-read.ts +++ b/src/handlers/records-read.ts @@ -1,13 +1,15 @@ import type { MethodHandler } from '../types/method-handler.js'; +import type { RecordsWriteMessageWithOptionalEncodedData } from '../store/storage-controller.js'; import type { TimestampedMessage } from '../types/message-types.js'; import type { DataStore, DidResolver, MessageStore } from '../index.js'; -import type { RecordsReadMessage, RecordsReadReply, RecordsWriteMessage } from '../types/records-types.js'; +import type { RecordsReadMessage, RecordsReadReply } from '../types/records-types.js'; import { authenticate } from '../core/auth.js'; import { Message } from '../core/message.js'; import { messageReplyFromError } from '../core/message-reply.js'; import { RecordsRead } from '../interfaces/records-read.js'; import { RecordsWrite } from '../interfaces/records-write.js'; +import { DataStream, Encoder } from '../index.js'; import { DwnInterfaceName, DwnMethodName } from '../core/message.js'; export class RecordsReadHandler implements MethodHandler { @@ -51,20 +53,27 @@ export class RecordsReadHandler implements MethodHandler { }; } - const newestRecordsWrite = newestExistingMessage as RecordsWriteMessage; + const newestRecordsWrite = newestExistingMessage as RecordsWriteMessageWithOptionalEncodedData; try { await recordsRead.authorize(tenant, await RecordsWrite.parse(newestRecordsWrite), this.messageStore); } catch (error) { return messageReplyFromError(error, 401); } - const messageCid = await Message.getCid(newestRecordsWrite); - const result = await this.dataStore.get(tenant, messageCid, newestRecordsWrite.descriptor.dataCid); - - if (result?.dataStream === undefined) { - return { - status: { code: 404, detail: 'Not Found' } - }; + let data; + if (newestRecordsWrite.encodedData !== undefined) { + const dataBytes = Encoder.base64UrlToBytes(newestRecordsWrite.encodedData); + data = DataStream.fromBytes(dataBytes); + delete newestRecordsWrite.encodedData; + } else { + const messageCid = await Message.getCid(newestRecordsWrite); + const result = await this.dataStore.get(tenant, messageCid, newestRecordsWrite.descriptor.dataCid); + if (result?.dataStream === undefined) { + return { + status: { code: 404, detail: 'Not Found' } + }; + } + data = result.dataStream; } const { authorization: _, ...recordsWriteWithoutAuthorization } = newestRecordsWrite; // a trick to stripping away `authorization` @@ -72,7 +81,7 @@ export class RecordsReadHandler implements MethodHandler { status : { code: 200, detail: 'OK' }, record : { ...recordsWriteWithoutAuthorization, - data: result.dataStream + data, } }; return messageReply; diff --git a/src/handlers/records-write.ts b/src/handlers/records-write.ts index 57108d3fa..5306b3663 100644 --- a/src/handlers/records-write.ts +++ b/src/handlers/records-write.ts @@ -1,6 +1,7 @@ import type { EventLog } from '../types/event-log.js'; import type { GenericMessageReply } from '../core/message-reply.js'; import type { MethodHandler } from '../types/method-handler.js'; +import type { RecordsWriteMessageWithOptionalEncodedData } from '../store/storage-controller.js'; import type { DataStore, DidResolver, MessageStore } from '../index.js'; import type { RecordsDeleteMessage, RecordsWriteMessage } from '../types/records-types.js'; @@ -8,6 +9,7 @@ import { authenticate } from '../core/auth.js'; import { messageReplyFromError } from '../core/message-reply.js'; import { RecordsWrite } from '../interfaces/records-write.js'; import { StorageController } from '../store/storage-controller.js'; +import { Cid, DataStream, DwnConstant, Encoder } from '../index.js'; import { DwnError, DwnErrorCode } from '../core/dwn-error.js'; import { DwnInterfaceName, DwnMethodName, Message } from '../core/message.js'; @@ -80,25 +82,42 @@ export class RecordsWriteHandler implements MethodHandler { const isLatestBaseState = true; const indexes = await constructRecordsWriteIndexes(recordsWrite, isLatestBaseState); - try { - // try to store data, unless options explicitly say to skip storage - if (options === undefined || !options.skipDataStorage) { - await this.putData(tenant, message, dataStream, newestExistingMessage as (RecordsWriteMessage|RecordsDeleteMessage) | undefined); - } - } catch (error) { - const e = error as any; - if (e.code === DwnErrorCode.RecordsWriteMissingDataStream || - e.code === DwnErrorCode.RecordsWriteMissingData || - e.code === DwnErrorCode.RecordsWriteDataCidMismatch || - e.code === DwnErrorCode.RecordsWriteDataSizeMismatch) { - return messageReplyFromError(error, 400); + // if data is below a certain threshold, we embed the data directly into the message for storage in MessageStore. + let messageWithOptionalEncodedData: RecordsWriteMessageWithOptionalEncodedData = message; + + // try to store data, unless options explicitly say to skip storage + if (options === undefined || !options.skipDataStorage) { + if (dataStream === undefined && newestExistingMessage?.descriptor.method === DwnMethodName.Delete) { + return messageReplyFromError(new DwnError(DwnErrorCode.RecordsWriteMissingDataStream, 'No data stream was provided with the previous message being a delete'), 400); } - // else throw - throw error; + try { + // if data is below the threshold, we store it within MessageStore + if (message.descriptor.dataSize <= DwnConstant.maxDataSizeAllowedToBeEncoded) { + // processes and sets `encodedData` with appropriate data. + messageWithOptionalEncodedData = await this.processEncodedData( + message, + dataStream, + newestExistingMessage as (RecordsWriteMessage|RecordsDeleteMessage) | undefined + ); + } else { + await this.putData(tenant, message, dataStream); + } + } catch (error) { + const e = error as any; + if (e.code === DwnErrorCode.RecordsWriteMissingDataInPrevious || + e.code === DwnErrorCode.RecordsWriteMissingDataAssociation || + e.code === DwnErrorCode.RecordsWriteDataCidMismatch || + e.code === DwnErrorCode.RecordsWriteDataSizeMismatch) { + return messageReplyFromError(error, 400); + } + + // else throw + throw error; + } } - await this.messageStore.put(tenant, message, indexes); + await this.messageStore.put(tenant, messageWithOptionalEncodedData, indexes); await this.eventLog.append(tenant, await Message.getCid(message)); const messageReply = { @@ -113,13 +132,52 @@ export class RecordsWriteHandler implements MethodHandler { return messageReply; }; + /** + * Embeds the record's data into the `encodedData` property. + * If dataStream is present, it uses the dataStream. Otherwise, uses the `encodedData` from the most recent RecordsWrite. + * + * @returns {RecordsWriteMessageWithOptionalEncodedData} `encodedData` embedded. + * + * @throws {DwnError} with `DwnErrorCode.RecordsWriteMissingDataInPrevious` + * if `dataStream` is absent AND `encodedData` of previous message is missing + * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataCidMismatch` + * if the data stream resulted in a data CID that mismatches with `dataCid` in the given message + * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataSizeMismatch` + * if `dataSize` in `descriptor` given mismatches the actual data size + */ + public async processEncodedData( + message: RecordsWriteMessage, + dataStream?: _Readable.Readable, + newestExistingMessage?: RecordsWriteMessage | RecordsDeleteMessage + ): Promise { + let dataBytes; + if (dataStream === undefined) { + const newestWithData = newestExistingMessage as RecordsWriteMessageWithOptionalEncodedData | undefined; + if (newestWithData?.encodedData === undefined) { + throw new DwnError( + DwnErrorCode.RecordsWriteMissingDataInPrevious, + `No dataStream was provided and unable to get data from previous message` + ); + } else { + dataBytes = Encoder.base64UrlToBytes(newestWithData.encodedData); + } + } else { + dataBytes = await DataStream.toBytes(dataStream); + } + + const dataCid = await Cid.computeDagPbCidFromBytes(dataBytes); + RecordsWriteHandler.validateDataIntegrity(message.descriptor.dataCid, message.descriptor.dataSize, dataCid, dataBytes.length); + + const recordsWrite: RecordsWriteMessageWithOptionalEncodedData = { ...message }; + recordsWrite.encodedData = Encoder.bytesToBase64Url(dataBytes); + return recordsWrite; + } + /** * Puts the given data in storage unless tenant already has that data for the given recordId * - * @throws {DwnError} with `DwnErrorCode.RecordsWriteMissingDataStream` - * if `dataStream` is absent AND the `dataCid` does not match the current data for the given recordId - * @throws {DwnError} with `DwnErrorCode.RecordsWriteMissingData` - * if `dataStream` is absent AND dataStore does not contain the given `dataCid` + * @throws {DwnError} with `DwnErrorCode.RecordsWriteMissingDataAssociation` + * if `dataStream` is absent AND unable to associate data given `dataCid` * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataCidMismatch` * if the data stream resulted in a data CID that mismatches with `dataCid` in the given message * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataSizeMismatch` @@ -129,54 +187,58 @@ export class RecordsWriteHandler implements MethodHandler { tenant: string, message: RecordsWriteMessage, dataStream?: _Readable.Readable, - newestExistingMessage?: RecordsWriteMessage | RecordsDeleteMessage ): Promise { let result: { dataCid: string, dataSize: number }; const messageCid = await Message.getCid(message); if (dataStream === undefined) { - // dataStream must be included if message contains a new dataCid - if (newestExistingMessage?.descriptor.method === DwnMethodName.Delete || - newestExistingMessage?.descriptor.dataCid !== message.descriptor.dataCid) { - throw new DwnError( - DwnErrorCode.RecordsWriteMissingDataStream, - 'Data stream is not provided.' - ); - } - const associateResult = await this.dataStore.associate(tenant, messageCid, message.descriptor.dataCid); if (associateResult === undefined) { - throw new DwnError(DwnErrorCode.RecordsWriteMissingData, `Unable to associate dataCid ${message.descriptor.dataCid} ` + + throw new DwnError(DwnErrorCode.RecordsWriteMissingDataAssociation, `Unable to associate dataCid ${message.descriptor.dataCid} ` + `to messageCid ${messageCid} because dataStream was not provided and data was not found in dataStore`); } - result = associateResult; } else { result = await this.dataStore.put(tenant, messageCid, message.descriptor.dataCid, dataStream); } - // verify that given dataSize matches size of actual data - if (message.descriptor.dataSize !== result.dataSize) { - // there is an opportunity to improve here: handle the edge case of if the delete fails... + try { + RecordsWriteHandler.validateDataIntegrity(message.descriptor.dataCid, message.descriptor.dataSize, result.dataCid, result.dataSize); + } catch (error) { + // delete data and throw error to caller await this.dataStore.delete(tenant, messageCid, message.descriptor.dataCid); + throw error; + } + } + /** + * Validates the expected `dataCid` and `dataSize` in the descriptor vs the received data. + * + * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataCidMismatch` + * if the data stream resulted in a data CID that mismatches with `dataCid` in the given message + * @throws {DwnError} with `DwnErrorCode.RecordsWriteDataSizeMismatch` + * if `dataSize` in `descriptor` given mismatches the actual data size + */ + static validateDataIntegrity( + expectedDataCid: string, + expectedDataSize: number, + actualDataCid: string, + actualDataSize: number + ): void { + if (expectedDataCid !== actualDataCid) { throw new DwnError( - DwnErrorCode.RecordsWriteDataSizeMismatch, - `actual data size ${result.dataSize} bytes does not match dataSize in descriptor: ${message.descriptor.dataSize}` + DwnErrorCode.RecordsWriteDataCidMismatch, + `actual data CID ${actualDataCid} does not match dataCid in descriptor: ${expectedDataCid}` ); } - - // verify that given dataCid matches CID of actual data - if (message.descriptor.dataCid !== result.dataCid) { - // there is an opportunity to improve here: handle the edge cae of if the delete fails... - await this.dataStore.delete(tenant, messageCid, message.descriptor.dataCid); - + if (expectedDataSize !== actualDataSize) { throw new DwnError( - DwnErrorCode.RecordsWriteDataCidMismatch, - `actual data CID ${result.dataCid} does not match dataCid in descriptor: ${message.descriptor.dataCid}` + DwnErrorCode.RecordsWriteDataSizeMismatch, + `actual data size ${actualDataSize} bytes does not match dataSize in descriptor: ${expectedDataSize}` ); } } + } export async function constructRecordsWriteIndexes( diff --git a/src/store/message-store-level.ts b/src/store/message-store-level.ts index b14fe8811..5bca0f707 100644 --- a/src/store/message-store-level.ts +++ b/src/store/message-store-level.ts @@ -10,6 +10,7 @@ import { createLevelDatabase } from './level-wrapper.js'; import { executeUnlessAborted } from '../utils/abort.js'; import { IndexLevel } from './index-level.js'; import { sha256 } from 'multiformats/hashes/sha2'; +import { Cid, Message } from '../index.js'; /** * A simple implementation of {@link MessageStore} that works in both the browser and server-side. @@ -113,14 +114,16 @@ export class MessageStoreLevel implements MessageStore { const encodedMessageBlock = await executeUnlessAborted(block.encode({ value: message, codec: cbor, hasher: sha256 }), options?.signal); - await partition.put(encodedMessageBlock.cid, encodedMessageBlock.bytes, options); + // MessageStore data may contain `encodedData` which is not taken into account when calculating the blockCID as it is optional data. + const messageCid = Cid.parseCid(await Message.getCid(message)); + await partition.put(messageCid, encodedMessageBlock.bytes, options); - const encodedMessageBlockCid = encodedMessageBlock.cid.toString(); + const messageCidString = messageCid.toString(); const indexDocument = { ...indexes, tenant, }; - await this.index.put(encodedMessageBlockCid, indexDocument, options); + await this.index.put(messageCidString, indexDocument, options); } /** diff --git a/src/store/storage-controller.ts b/src/store/storage-controller.ts index 3c6fbc78f..6e0213c9d 100644 --- a/src/store/storage-controller.ts +++ b/src/store/storage-controller.ts @@ -2,12 +2,10 @@ import type { DataStore } from '../types/data-store.js'; import type { EventLog } from '../types/event-log.js'; import type { MessageStore } from '../types/message-store.js'; import type { RecordsWriteMessage } from '../types/records-types.js'; -import type { Filter, GenericMessage, TimestampedMessage } from '../types/message-types.js'; +import type { GenericMessage, TimestampedMessage } from '../types/message-types.js'; import { constructRecordsWriteIndexes } from '../handlers/records-write.js'; -import { DataStream } from '../utils/data-stream.js'; import { DwnConstant } from '../core/dwn-constant.js'; -import { Encoder } from '../utils/encoder.js'; import { RecordsWrite } from '../interfaces/records-write.js'; import { DwnMethodName, Message } from '../core/message.js'; @@ -15,33 +13,6 @@ import { DwnMethodName, Message } from '../core/message.js'; * A class that provides an abstraction for the usage of MessageStore, DataStore, and EventLog. */ export class StorageController { - public static async query( - messageStore: MessageStore, - dataStore: DataStore, - tenant: string, - filter: Filter - ): Promise { - - const messages: RecordsWriteMessageWithOptionalEncodedData[] = (await messageStore.query(tenant, filter)) as RecordsWriteMessage[]; - - // for every message, only include the data as `encodedData` if the data size is equal or smaller than the size threshold - for (const message of messages) { - const dataCid = message.descriptor.dataCid; - const dataSize = message.descriptor.dataSize; - if (dataCid !== undefined && dataSize! <= DwnConstant.maxDataSizeAllowedToBeEncoded) { - const messageCid = await Message.getCid(message); - const result = await dataStore.get(tenant, messageCid, dataCid); - - if (result) { - const dataBytes = await DataStream.toBytes(result.dataStream); - message.encodedData = Encoder.bytesToBase64Url(dataBytes); - } - } - } - - return messages; - } - /** * Deletes a message. */ @@ -53,7 +24,8 @@ export class StorageController { ): Promise { const messageCid = await Message.getCid(message); - if (message.descriptor.method === DwnMethodName.Write) { + if (message.descriptor.method === DwnMethodName.Write && + (message as RecordsWriteMessage).descriptor.dataSize > DwnConstant.maxDataSizeAllowedToBeEncoded) { const recordsWriteMessage = message as RecordsWriteMessage; await dataStore.delete(tenant, messageCid, recordsWriteMessage.descriptor.dataCid); } @@ -93,7 +65,9 @@ export class StorageController { const existingRecordsWrite = await RecordsWrite.parse(message as RecordsWriteMessage); const isLatestBaseState = false; const indexes = await constructRecordsWriteIndexes(existingRecordsWrite, isLatestBaseState); - await messageStore.put(tenant, message, indexes); + const writeMessage = message as RecordsWriteMessageWithOptionalEncodedData; + delete writeMessage.encodedData; + await messageStore.put(tenant, writeMessage, indexes); } else { const messageCid = await Message.getCid(message); deletedMessageCids.push(messageCid); @@ -105,4 +79,5 @@ export class StorageController { } } +// records with a data size below a threshold are stored within MessageStore with their data embedded export type RecordsWriteMessageWithOptionalEncodedData = RecordsWriteMessage & { encodedData?: string }; diff --git a/tests/core/message.spec.ts b/tests/core/message.spec.ts index df0e6ba10..6f62defb9 100644 --- a/tests/core/message.spec.ts +++ b/tests/core/message.spec.ts @@ -1,3 +1,5 @@ +import type { RecordsWriteMessageWithOptionalEncodedData } from '../../src/store/storage-controller.js'; + import { expect } from 'chai'; import { Message } from '../../src/core/message.js'; import { RecordsRead } from '../../src/index.js'; @@ -65,4 +67,18 @@ describe('Message', () => { expect((newestMessage as any).recordId).to.equal(a.recordId); }); }); + + describe('getCid()', () => { + it('encodedData does not have an effect on getCid()', async () => { + const { message } = await TestDataGenerator.generateRecordsWrite(); + const cid1 = await Message.getCid(message); + + const messageWithData: RecordsWriteMessageWithOptionalEncodedData = message; + messageWithData.encodedData = TestDataGenerator.randomString(25); + + const cid2 = await Message.getCid(messageWithData); + + expect(cid1).to.equal(cid2); + }); + }); }); \ No newline at end of file diff --git a/tests/handlers/messages-get.spec.ts b/tests/handlers/messages-get.spec.ts index 21af80b1a..4d4f7cd95 100644 --- a/tests/handlers/messages-get.spec.ts +++ b/tests/handlers/messages-get.spec.ts @@ -11,8 +11,7 @@ import { MessagesGetHandler } from '../../src/handlers/messages-get.js'; import { stubInterface } from 'ts-sinon'; import { TestDataGenerator } from '../utils/test-data-generator.js'; import { TestStores } from '../test-stores.js'; - -import { DidKeyResolver, DidResolver, Dwn } from '../../src/index.js'; +import { DidKeyResolver, DidResolver, Dwn, DwnConstant } from '../../src/index.js'; import sinon from 'sinon'; @@ -212,7 +211,8 @@ export function testMessagesGetHandler(): void { const alice = await DidKeyResolver.generate(); const { recordsWrite, dataStream } = await TestDataGenerator.generateRecordsWrite({ - author: alice + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded), }); const reply = await dwn.processMessage(alice.did, recordsWrite.toJSON(), dataStream); diff --git a/tests/handlers/records-query.spec.ts b/tests/handlers/records-query.spec.ts index f0065a240..f5797660a 100644 --- a/tests/handlers/records-query.spec.ts +++ b/tests/handlers/records-query.spec.ts @@ -518,22 +518,22 @@ export function testRecordsQueryHandler(): void { const recordsWriteHandler = new RecordsWriteHandler(didResolver, messageStore, dataStore, eventLog); const additionalIndexes1 = await constructRecordsWriteIndexes(record1Data.recordsWrite, true); - await recordsWriteHandler.putData(alice.did, record1Data.message, record1Data.dataStream); + record1Data.message = await recordsWriteHandler.processEncodedData(record1Data.message, record1Data.dataStream); await messageStore.put(alice.did, record1Data.message, additionalIndexes1); await eventLog.append(alice.did, await Message.getCid(record1Data.message)); const additionalIndexes2 = await constructRecordsWriteIndexes(record2Data.recordsWrite, true); - await recordsWriteHandler.putData(alice.did, record2Data.message, record2Data.dataStream); + record2Data.message = await recordsWriteHandler.processEncodedData(record2Data.message, record2Data.dataStream); await messageStore.put(alice.did, record2Data.message, additionalIndexes2); await eventLog.append(alice.did, await Message.getCid(record2Data.message)); const additionalIndexes3 = await constructRecordsWriteIndexes(record3Data.recordsWrite, true); - await recordsWriteHandler.putData(alice.did, record3Data.message, record3Data.dataStream); + record3Data.message = await recordsWriteHandler.processEncodedData(record3Data.message, record3Data.dataStream); await messageStore.put(alice.did, record3Data.message, additionalIndexes3); await eventLog.append(alice.did, await Message.getCid(record3Data.message)); const additionalIndexes4 = await constructRecordsWriteIndexes(record4Data.recordsWrite, true); - await recordsWriteHandler.putData(alice.did, record4Data.message, record4Data.dataStream); + record4Data.message = await recordsWriteHandler.processEncodedData(record4Data.message, record4Data.dataStream); await messageStore.put(alice.did, record4Data.message, additionalIndexes4); await eventLog.append(alice.did, await Message.getCid(record4Data.message)); diff --git a/tests/handlers/records-read.spec.ts b/tests/handlers/records-read.spec.ts index c7199706b..af44eaef0 100644 --- a/tests/handlers/records-read.spec.ts +++ b/tests/handlers/records-read.spec.ts @@ -29,7 +29,7 @@ import { TestDataGenerator } from '../utils/test-data-generator.js'; import { TestStores } from '../test-stores.js'; import { TestStubGenerator } from '../utils/test-stub-generator.js'; -import { DataStream, DidResolver, Dwn, Jws, Protocols, ProtocolsConfigure, ProtocolsQuery, Records, RecordsDelete, RecordsRead , RecordsWrite, Secp256k1 } from '../../src/index.js'; +import { DataStream, DidResolver, Dwn, DwnConstant, Jws, Protocols, ProtocolsConfigure, ProtocolsQuery, Records, RecordsDelete, RecordsRead , RecordsWrite, Secp256k1 } from '../../src/index.js'; chai.use(chaiAsPromised); @@ -847,13 +847,16 @@ export function testRecordsReadHandler(): void { expect(readReply.status.code).to.equal(404); }); - it('should return 404 underlying data store cannot locate the data', async () => { + it('should return 404 underlying data store cannot locate the data when data is above threshold', async () => { const alice = await DidKeyResolver.generate(); sinon.stub(dataStore, 'get').resolves(undefined); - // insert data - const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice }); + // insert data larger than the allowed amount in encodedData + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded +1) + }); const writeReply = await dwn.processMessage(alice.did, message, dataStream); expect(writeReply.status.code).to.equal(202); @@ -867,6 +870,65 @@ export function testRecordsReadHandler(): void { expect(readReply.status.code).to.equal(404); }); + describe('data from encodedData', () => { + it('should not get data from DataStore if encodedData exists', async () => { + const alice = await DidKeyResolver.generate(); + + //since the data is at the threshold it will be returned from the messageStore in the `encodedData` field. + const { message, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded) + }); + + const writeReply = await dwn.processMessage(alice.did, message, dataStream); + expect(writeReply.status.code).to.equal(202); + + const recordRead = await RecordsRead.create({ + recordId : message.recordId, + authorizationSignatureInput : Jws.createSignatureInput(alice) + }); + + const dataStoreGet = sinon.spy(dataStore, 'get'); + + const recordsReadResponse = await dwn.handleRecordsRead(alice.did, recordRead.message); + expect(recordsReadResponse.status.code).to.equal(200); + expect(recordsReadResponse.record).to.exist; + expect(recordsReadResponse.record!.data).to.exist; + sinon.assert.notCalled(dataStoreGet); + + const readData = await DataStream.toBytes(recordsReadResponse.record!.data); + expect(readData).to.eql(dataBytes); + }); + it('should get data from DataStore if encodedData does not exist', async () => { + const alice = await DidKeyResolver.generate(); + + //since the data is over the threshold it will not be returned from the messageStore in the `encodedData` field. + const { message, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded +1) + }); + + const writeReply = await dwn.processMessage(alice.did, message, dataStream); + expect(writeReply.status.code).to.equal(202); + + const recordRead = await RecordsRead.create({ + recordId : message.recordId, + authorizationSignatureInput : Jws.createSignatureInput(alice) + }); + + const dataStoreGet = sinon.spy(dataStore, 'get'); + + const recordsReadResponse = await dwn.handleRecordsRead(alice.did, recordRead.message); + expect(recordsReadResponse.status.code).to.equal(200); + expect(recordsReadResponse.record).to.exist; + expect(recordsReadResponse.record!.data).to.exist; + sinon.assert.calledOnce(dataStoreGet); + + const readData = await DataStream.toBytes(recordsReadResponse.record!.data); + expect(readData).to.eql(dataBytes); + }); + }); + describe('encryption scenarios', () => { it('should be able to decrypt flat-space schema-contained record with a correct derived key', async () => { // scenario: Alice writes into her own DWN an encrypted record and she is able to decrypt it diff --git a/tests/handlers/records-write.spec.ts b/tests/handlers/records-write.spec.ts index ddd739c4c..a977caa48 100644 --- a/tests/handlers/records-write.spec.ts +++ b/tests/handlers/records-write.spec.ts @@ -3,6 +3,7 @@ import type { GenerateFromRecordsWriteOut } from '../utils/test-data-generator.j import type { ProtocolDefinition } from '../../src/types/protocols-types.js'; import type { QueryResultEntry } from '../../src/types/message-types.js'; import type { RecordsWriteMessage } from '../../src/types/records-types.js'; +import type { RecordsWriteMessageWithOptionalEncodedData } from '../../src/store/storage-controller.js'; import type { DataStore, EventLog, MessageStore } from '../../src/index.js'; import chaiAsPromised from 'chai-as-promised'; @@ -32,7 +33,6 @@ import { Encoder } from '../../src/utils/encoder.js'; import { GeneralJwsSigner } from '../../src/jose/jws/general/signer.js'; import { getCurrentTimeInHighPrecision } from '../../src/utils/time.js'; import { Jws } from '../../src/utils/jws.js'; -import { KeyDerivationScheme } from '../../src/index.js'; import { Message } from '../../src/core/message.js'; import { RecordsRead } from '../../src/interfaces/records-read.js'; import { RecordsWrite } from '../../src/interfaces/records-write.js'; @@ -41,7 +41,7 @@ import { stubInterface } from 'ts-sinon'; import { TestDataGenerator } from '../utils/test-data-generator.js'; import { TestStores } from '../test-stores.js'; import { TestStubGenerator } from '../utils/test-stub-generator.js'; - +import { DwnConstant, KeyDerivationScheme, RecordsDelete } from '../../src/index.js'; import { Encryption, EncryptionAlgorithm } from '../../src/utils/encryption.js'; chai.use(chaiAsPromised); @@ -81,7 +81,6 @@ export function testRecordsWriteHandler(): void { after(async () => { await dwn.close(); }); - it('should only be able to overwrite existing record if new record has a later `messageTimestamp` value', async () => { // write a message into DB const author = await DidKeyResolver.generate(); @@ -279,35 +278,306 @@ export function testRecordsWriteHandler(): void { expect(reply.status.detail).to.contain('dataFormat is an immutable property'); }); - it('should return 400 if actual data size mismatches with `dataSize` in descriptor', async () => { + it('should inherit data from previous RecordsWrite given a matching dataCid and dataSize and no dataStream', async () => { + const { message, author, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + published: false + }); + const tenant = author.did; + + TestStubGenerator.stubDidResolver(didResolver, [author]); + + const initialWriteReply = await dwn.processMessage(tenant, message, dataStream); + expect(initialWriteReply.status.code).to.equal(202); + + const write2 = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + published : true, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + + const writeUpdateReply = await dwn.processMessage(tenant, write2.message); + expect(writeUpdateReply.status.code).to.equal(202); + const readMessage = await RecordsRead.create({ + recordId: message.recordId, + }); + + const readMessageReply = await dwn.handleRecordsRead(tenant, readMessage.message); + expect(readMessageReply.status.code).to.equal(200); + expect(readMessageReply.record).to.exist; + const data = await DataStream.toBytes(readMessageReply.record!.data); + expect(data).to.eql(dataBytes); + }); + + describe('should inherit data from previous RecordsWrite given a matching dataCid and dataSize and no dataStream', () => { + it('with data above the threshold for encodedData', async () => { + const { message, author, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1), + published : false + }); + const tenant = author.did; + + TestStubGenerator.stubDidResolver(didResolver, [author]); + + const initialWriteReply = await dwn.processMessage(tenant, message, dataStream); + expect(initialWriteReply.status.code).to.equal(202); + + const write2 = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + published : true, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + + const writeUpdateReply = await dwn.processMessage(tenant, write2.message); + expect(writeUpdateReply.status.code).to.equal(202); + const readMessage = await RecordsRead.create({ + recordId: message.recordId, + }); + + const readMessageReply = await dwn.handleRecordsRead(tenant, readMessage.message); + expect(readMessageReply.status.code).to.equal(200); + expect(readMessageReply.record).to.exist; + const data = await DataStream.toBytes(readMessageReply.record!.data); + expect(data).to.eql(dataBytes); + }); + + it('with data equal to or below the threshold for encodedData', async () => { + const { message, author, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded), + published : false + }); + const tenant = author.did; + + TestStubGenerator.stubDidResolver(didResolver, [author]); + + const initialWriteReply = await dwn.processMessage(tenant, message, dataStream); + expect(initialWriteReply.status.code).to.equal(202); + + const write2 = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + published : true, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + + const writeUpdateReply = await dwn.processMessage(tenant, write2.message); + expect(writeUpdateReply.status.code).to.equal(202); + const readMessage = await RecordsRead.create({ + recordId: message.recordId, + }); + + const readMessageReply = await dwn.handleRecordsRead(tenant, readMessage.message); + expect(readMessageReply.status.code).to.equal(200); + expect(readMessageReply.record).to.exist; + const data = await DataStream.toBytes(readMessageReply.record!.data); + expect(data).to.eql(dataBytes); + }); + }); + + describe('should return 400 if actual data size mismatches with `dataSize` in descriptor', () => { + it('with dataStream and `dataSize` larger than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1) + }); + + // replace the dataSize to simulate mismatch, will need to generate `recordId` and `authorization` property again + message.descriptor.dataSize = DwnConstant.maxDataSizeAllowedToBeEncoded + 100; + const descriptorCid = await Cid.computeCid(message.descriptor); + const recordId = await RecordsWrite.getEntryId(alice.did, message.descriptor); + const authorizationSignatureInput = Jws.createSignatureInput(alice); + const authorization = await RecordsWrite['createAuthorization'](recordId, message.contextId, descriptorCid, message.attestation, message.encryption, authorizationSignatureInput); + message.recordId = recordId; + message.authorization = authorization; + + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataSizeMismatch); + }); + + it('with only `dataSize` larger than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded) + }); + + // replace the dataSize to simulate mismatch, will need to generate `recordId` and `authorization` property again + message.descriptor.dataSize = DwnConstant.maxDataSizeAllowedToBeEncoded + 100; + const descriptorCid = await Cid.computeCid(message.descriptor); + const recordId = await RecordsWrite.getEntryId(alice.did, message.descriptor); + const authorizationSignatureInput = Jws.createSignatureInput(alice); + const authorization = await RecordsWrite['createAuthorization'](recordId, message.contextId, descriptorCid, message.attestation, message.encryption, authorizationSignatureInput); + message.recordId = recordId; + message.authorization = authorization; + + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataSizeMismatch); + }); + + it('with only dataStream larger than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1) + }); + + // replace the dataSize to simulate mismatch, will need to generate `recordId` and `authorization` property again + message.descriptor.dataSize = 1; + const descriptorCid = await Cid.computeCid(message.descriptor); + const recordId = await RecordsWrite.getEntryId(alice.did, message.descriptor); + const authorizationSignatureInput = Jws.createSignatureInput(alice); + const authorization = await RecordsWrite['createAuthorization'](recordId, message.contextId, descriptorCid, message.attestation, message.encryption, authorizationSignatureInput); + message.recordId = recordId; + message.authorization = authorization; + + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataSizeMismatch); + }); + + it('with both `dataSize` and dataStream below than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + author: alice + }); + + // replace the dataSize to simulate mismatch, will need to generate `recordId` and `authorization` property again + message.descriptor.dataSize = 1; + const descriptorCid = await Cid.computeCid(message.descriptor); + const recordId = await RecordsWrite.getEntryId(alice.did, message.descriptor); + const authorizationSignatureInput = Jws.createSignatureInput(alice); + const authorization = await RecordsWrite['createAuthorization'](recordId, message.contextId, descriptorCid, message.attestation, message.encryption, authorizationSignatureInput); + message.recordId = recordId; + message.authorization = authorization; + + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataSizeMismatch); + }); + }); + + it('should return 400 for if dataStream is not present for a write after a delete', async () => { + const { message, author, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded), + published : false + }); + const tenant = author.did; + + TestStubGenerator.stubDidResolver(didResolver, [author]); + + const initialWriteReply = await dwn.processMessage(tenant, message, dataStream); + expect(initialWriteReply.status.code).to.equal(202); + + const recordsDelete = await RecordsDelete.create({ + recordId : message.recordId, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + const deleteReply = await dwn.processMessage(tenant, recordsDelete.message); + expect(deleteReply.status.code).to.equal(202); + + const write = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + + const withoutDataReply = await dwn.processMessage(tenant, write.message); + expect(withoutDataReply.status.code).to.equal(400); + expect(withoutDataReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + const updatedWriteData = DataStream.fromBytes(dataBytes!); + const withoutDataReply2 = await dwn.processMessage(tenant, write.message, updatedWriteData); + expect(withoutDataReply2.status.code).to.equal(202); + }); + + it('should return 400 for if dataStream is not present for a write after a delete with data above the threshold', async () => { + const { message, author, dataStream, dataBytes } = await TestDataGenerator.generateRecordsWrite({ + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1), + published : false + }); + const tenant = author.did; + + TestStubGenerator.stubDidResolver(didResolver, [author]); + + const initialWriteReply = await dwn.processMessage(tenant, message, dataStream); + expect(initialWriteReply.status.code).to.equal(202); + + const recordsDelete = await RecordsDelete.create({ + recordId : message.recordId, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + const deleteReply = await dwn.processMessage(tenant, recordsDelete.message); + expect(deleteReply.status.code).to.equal(202); + + const write = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + authorizationSignatureInput : Jws.createSignatureInput(author), + }); + + const withoutDataReply = await dwn.processMessage(tenant, write.message); + expect(withoutDataReply.status.code).to.equal(400); + expect(withoutDataReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + const updatedWriteData = DataStream.fromBytes(dataBytes!); + const withoutDataReply2 = await dwn.processMessage(tenant, write.message, updatedWriteData); + expect(withoutDataReply2.status.code).to.equal(202); + }); + + it('should return 400 for data CID mismatch with both dataStream and `dataSize` larger than encodedData threshold', async () => { const alice = await DidKeyResolver.generate(); - const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice }); + const { message } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1) + }); + const dataStream = + DataStream.fromBytes(TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1)); // mismatch data stream - // replace the dataSize to simulate mismatch, will need to generate `recordId` and `authorization` property again - message.descriptor.dataSize = 1; - const descriptorCid = await Cid.computeCid(message.descriptor); - const recordId = await RecordsWrite.getEntryId(alice.did, message.descriptor); - const authorizationSignatureInput = Jws.createSignatureInput(alice); - const authorization = await RecordsWrite['createAuthorization'](recordId, message.contextId, descriptorCid, message.attestation, message.encryption, authorizationSignatureInput); - message.recordId = recordId; - message.authorization = authorization; + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataCidMismatch); + }); + + it('should return 400 for data CID mismatch with `dataSize` larger than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1) + }); + const dataStream = + DataStream.fromBytes(TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded)); // mismatch data stream const reply = await dwn.processMessage(alice.did, message, dataStream); expect(reply.status.code).to.equal(400); - expect(reply.status.detail).to.contain('does not match dataSize in descriptor'); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataCidMismatch); }); - it('should return 400 if actual data CID of mismatches with `dataCid` in descriptor', async () => { + it('should return 400 for data CID mismatch with dataStream larger than encodedData threshold', async () => { const alice = await DidKeyResolver.generate(); - const { message } = await TestDataGenerator.generateRecordsWrite({ author: alice }); - const dataStream = DataStream.fromBytes(TestDataGenerator.randomBytes(32)); // mismatch data stream + const { message } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded) + }); + const dataStream = + DataStream.fromBytes(TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1)); // mismatch data stream const reply = await dwn.processMessage(alice.did, message, dataStream); expect(reply.status.code).to.equal(400); - expect(reply.status.detail).to.contain('does not match dataCid in descriptor'); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataCidMismatch); }); - it('should return 400 if attempting to write a record without data stream', async () => { + it('should return 400 for data CID mismatch with both dataStream and `dataSize` below than encodedData threshold', async () => { + const alice = await DidKeyResolver.generate(); + const { message } = await TestDataGenerator.generateRecordsWrite({ + author : alice, + data : TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded) + }); + const dataStream = + DataStream.fromBytes(TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded)); // mismatch data stream + + const reply = await dwn.processMessage(alice.did, message, dataStream); + expect(reply.status.code).to.equal(400); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataCidMismatch); + }); + + it('should return 400 if attempting to write a record without data stream or data in a previous write', async () => { const alice = await DidKeyResolver.generate(); const { message } = await TestDataGenerator.generateRecordsWrite({ @@ -317,7 +587,7 @@ export function testRecordsWriteHandler(): void { const reply = await dwn.processMessage(alice.did, message); expect(reply.status.code).to.equal(400); - expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + expect(reply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataInPrevious); }); it('#359 - should not allow access of data by referencing a different`dataCid` in "modify" `RecordsWrite`', async () => { @@ -360,7 +630,7 @@ export function testRecordsWriteHandler(): void { }); const write2ChangeReply = await dwn.processMessage(alice.did, write2Change.message); expect(write2ChangeReply.status.code).to.equal(400); // should be disallowed - expect(write2ChangeReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + expect(write2ChangeReply.status.detail).to.contain(DwnErrorCode.RecordsWriteDataCidMismatch); // further sanity test to make sure the change is not written, ie. write2 still has the original data const read = await RecordsRead.create({ @@ -1817,7 +2087,7 @@ export function testRecordsWriteHandler(): void { }); const imageReply = await dwn.processMessage(alice.did, imageRecordsWrite.message, imageRecordsWrite.dataStream); expect(imageReply.status.code).to.equal(400); // should be disallowed - expect(imageReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + expect(imageReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataInPrevious); // further sanity test to make sure record is never written const bobRecordsReadData = await RecordsRead.create({ @@ -1835,8 +2105,9 @@ export function testRecordsWriteHandler(): void { // RecordsWrite for the same record, referencing the same `dataCid` but omitting the `dataStream`. // Pruned RecordsWrite + // Data large enough to use the DataStore const alice = await DidKeyResolver.generate(); - const data = Encoder.stringToBytes('data from bob'); + const data = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1); const prunedRecordsWrite = await TestDataGenerator.generateRecordsWrite({ author : alice, published : false, @@ -1854,7 +2125,7 @@ export function testRecordsWriteHandler(): void { }); const recordsWriteReply = await dwn.processMessage(alice.did, recordsWrite.message); expect(recordsWriteReply.status.code).to.equal(400); - expect(recordsWriteReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingData); + expect(recordsWriteReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataAssociation); }); describe('reference counting tests', () => { @@ -1890,7 +2161,7 @@ export function testRecordsWriteHandler(): void { }); const bobAssociateReply = await dwn.processMessage(bob.did, bobAssociateData.message, bobAssociateData.dataStream); expect(bobAssociateReply.status.code).to.equal(400); // expecting an error - expect(bobAssociateReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataStream); + expect(bobAssociateReply.status.detail).to.contain(DwnErrorCode.RecordsWriteMissingDataInPrevious); const aliceQueryWriteAfterBobAssociateData = await TestDataGenerator.generateRecordsQuery({ author : alice, @@ -1911,6 +2182,92 @@ export function testRecordsWriteHandler(): void { expect(bobQueryAssociateAfterBobAssociateReply.entries?.length).to.equal(0); }); }); + + describe('encodedData threshold', async () => { + it('should call processEncodedData and not putData if dataSize is less than or equal to the threshold', async () => { + const alice = await DidKeyResolver.generate(); + const dataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice, data: dataBytes }); + const processEncoded = sinon.spy(RecordsWriteHandler.prototype, 'processEncodedData'); + const putData = sinon.spy(RecordsWriteHandler.prototype, 'putData'); + + const writeMessage = await dwn.processMessage(alice.did, message, dataStream); + expect(writeMessage.status.code).to.equal(202); + sinon.assert.calledOnce(processEncoded); + sinon.assert.notCalled(putData); + }); + + it('should call putData and not processEncodedData if dataSize is greater than the threshold', async () => { + const alice = await DidKeyResolver.generate(); + const dataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice, data: dataBytes }); + const processEncoded = sinon.spy(RecordsWriteHandler.prototype, 'processEncodedData'); + const putData = sinon.spy(RecordsWriteHandler.prototype, 'putData'); + + const writeMessage = await dwn.processMessage(alice.did, message, dataStream); + expect(writeMessage.status.code).to.equal(202); + sinon.assert.notCalled(processEncoded); + sinon.assert.calledOnce(putData); + }); + + it('should have encodedData field if dataSize is less than or equal to the threshold', async () => { + const alice = await DidKeyResolver.generate(); + const dataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice, data: dataBytes }); + + const writeMessage = await dwn.processMessage(alice.did, message, dataStream); + expect(writeMessage.status.code).to.equal(202); + const messageCid = await Message.getCid(message); + + const storedMessage = await messageStore.get(alice.did, messageCid); + expect((storedMessage as RecordsWriteMessageWithOptionalEncodedData).encodedData).to.exist.and.not.be.undefined; + }); + + it('should not have encodedData field if dataSize greater than threshold', async () => { + const alice = await DidKeyResolver.generate(); + const dataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice, data: dataBytes }); + + const writeMessage = await dwn.processMessage(alice.did, message, dataStream); + expect(writeMessage.status.code).to.equal(202); + const messageCid = await Message.getCid(message); + + const storedMessage = await messageStore.get(alice.did, messageCid); + expect((storedMessage as RecordsWriteMessageWithOptionalEncodedData).encodedData).to.not.exist; + }); + + it('should retain original RecordsWrite message but without the encodedData if data is under threshold', async () => { + const alice = await DidKeyResolver.generate(); + const dataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded); + const { message, dataStream } = await TestDataGenerator.generateRecordsWrite({ author: alice, data: dataBytes }); + + const writeMessage = await dwn.processMessage(alice.did, message, dataStream); + expect(writeMessage.status.code).to.equal(202); + const messageCid = await Message.getCid(message); + + const storedMessage = await messageStore.get(alice.did, messageCid); + expect((storedMessage as RecordsWriteMessageWithOptionalEncodedData).encodedData).to.exist.and.not.be.undefined; + + const updatedDataBytes = TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded); + const newWrite = await RecordsWrite.createFrom({ + unsignedRecordsWriteMessage : message, + published : true, + authorizationSignatureInput : Jws.createSignatureInput(alice), + data : updatedDataBytes, + }); + + const updateDataStream = DataStream.fromBytes(updatedDataBytes); + + const writeMessage2 = await dwn.processMessage(alice.did, newWrite.message, updateDataStream); + expect(writeMessage2.status.code).to.equal(202); + + const originalWrite = await messageStore.get(alice.did, messageCid); + expect((originalWrite as RecordsWriteMessageWithOptionalEncodedData).encodedData).to.not.exist; + + const newestWrite = await messageStore.get(alice.did, await Message.getCid(newWrite.message)); + expect((newestWrite as RecordsWriteMessageWithOptionalEncodedData).encodedData).to.exist.and.not.be.undefined; + }); + }); }); describe('authorization validation tests', () => { @@ -2073,7 +2430,11 @@ export function testRecordsWriteHandler(): void { }); it('should throw if `recordsWritehandler.putData()` throws unknown error', async () => { - const { author, message, dataStream } = await TestDataGenerator.generateRecordsWrite(); + + // must generate a large enough data payload for putData to be triggered + const { author, message, dataStream } = await TestDataGenerator.generateRecordsWrite({ + data: TestDataGenerator.randomBytes(DwnConstant.maxDataSizeAllowedToBeEncoded + 1) + }); const tenant = author.did; const didResolverStub = TestStubGenerator.createDidResolverStub(author);