From ebf53390358b99957318afefacd3a2f19395fa04 Mon Sep 17 00:00:00 2001 From: hammadb Date: Tue, 12 Mar 2024 11:18:49 -0700 Subject: [PATCH] wip --- .../blockstore/arrow_blockfile/block/delta.rs | 21 ++++++++++++++++++- rust/worker/src/segment/record_segment.rs | 1 - 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/rust/worker/src/blockstore/arrow_blockfile/block/delta.rs b/rust/worker/src/blockstore/arrow_blockfile/block/delta.rs index b3f5944ae9d..eaf8bed6d6c 100644 --- a/rust/worker/src/blockstore/arrow_blockfile/block/delta.rs +++ b/rust/worker/src/blockstore/arrow_blockfile/block/delta.rs @@ -5,7 +5,10 @@ use crate::blockstore::{ }; use arrow::util::bit_util; use parking_lot::RwLock; -use std::{collections::BTreeMap, sync::Arc}; +use std::{ + collections::{BTreeMap, HashMap}, + sync::Arc, +}; /// A block delta tracks a source block and represents the new state of a block. Blocks are /// immutable, so when a write is made to a block, a new block is created with the new state. @@ -139,6 +142,10 @@ impl BlockDelta { struct BlockDeltaInner { new_data: BTreeMap, + // A cache of the metadata json size for each blockfile key. This is used to avoid + // reserializing the metadata json for each blockfile key. It may be heavy on memory + // but we can easily optimize this later. + metadata_json_cache: HashMap, } impl BlockDeltaInner { @@ -228,6 +235,18 @@ impl BlockDeltaInner { fn get_metadata_size(&self) -> usize { self.new_data.iter().fold(0, |acc, (_, value)| match value { Value::EmbeddingRecordValue(embedding_record) => { + match &embedding_record.metadata { + Some(metadata) => { + // RESUME POINT + let as_str = match serde_json::to_string(metadata) { + Ok(s) => s + Err(_) => // TODO: log error + }; + let len = as_str.len(); + acc + len + } + None => 0, + } // TODO: use real metadata length acc + 3 } diff --git a/rust/worker/src/segment/record_segment.rs b/rust/worker/src/segment/record_segment.rs index 6b34a020149..dbae2651bd5 100644 --- a/rust/worker/src/segment/record_segment.rs +++ b/rust/worker/src/segment/record_segment.rs @@ -28,7 +28,6 @@ impl RecordSegment { blockfile_provider.create("user_id_to_offset_id", KeyType::String, ValueType::Uint); let id_to_user_id = blockfile_provider.create("offset_id_to_user_id", KeyType::Uint, ValueType::String); - // TODO: add embedding record as a value type let records = blockfile_provider.create("record", KeyType::Uint, ValueType::EmbeddingRecord);