Skip to content

Commit

Permalink
Merge pull request #105 from buhe/pa
Browse files Browse the repository at this point in the history
  • Loading branch information
buhe authored Feb 17, 2024
2 parents 292812a + 8708f4d commit 8cc143b
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 18 deletions.
8 changes: 4 additions & 4 deletions Package.resolved
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/buhe/similarity-search-kit",
"state" : {
"revision" : "7bf84238c1d98d159063385a7be34620b8c640d2",
"version" : "0.0.14"
"revision" : "39bbd0b5c2662728792ee9be132212e5fd4a49e2",
"version" : "0.0.16"
}
},
{
Expand Down Expand Up @@ -266,8 +266,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/buhe/SwiftyNotion",
"state" : {
"revision" : "8ff884579370c0ebbc81ae1d6b8e9b62076d25b7",
"version" : "0.1.2"
"revision" : "61af40905bfd5eb8cad454e02095dfbdea0498cb",
"version" : "0.1.3"
}
},
{
Expand Down
2 changes: 1 addition & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ let package = Package(
.package(url: "https://github.com/juyan/swift-filestore", .upToNextMajor(from: "0.5.0")),
.package(url: "https://github.com/buhe/similarity-search-kit", from: "0.0.16"),
.package(url: "https://github.com/google/generative-ai-swift", .upToNextMajor(from: "0.4.4")),
.package(url: "https://github.com/buhe/SwiftyNotion", .upToNextMajor(from: "0.1.2")),
.package(url: "https://github.com/buhe/SwiftyNotion", .upToNextMajor(from: "0.1.3")),
.package(url: "https://github.com/nmdias/FeedKit", .upToNextMajor(from: "9.1.2")),
],
targets: [
Expand Down
9 changes: 8 additions & 1 deletion Sources/LangChain/document_loaders/BaseLoader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@
//

import Foundation
public struct Document {
public struct Document: Equatable {
public init(page_content: String, metadata: [String : String]) {
self.page_content = page_content
self.metadata = metadata
}
public let page_content: String
public var metadata: [String: String]
public static func == (lhs: Document, rhs: Document) -> Bool {
return lhs.page_content == rhs.page_content
}
}
public class BaseLoader {

Expand Down
30 changes: 28 additions & 2 deletions Sources/LangChain/retriever/ParentDocumentRetriever.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
//
// Created by 顾艳华 on 2023/11/17.
//

import CryptoKit
import Foundation

public class ParentDocumentRetriever: MultiVectorRetriever {
public init(child_splitter: TextSplitter, parent_splitter: TextSplitter? = nil, vectorstore: VectorStore, docstore: BaseStore) {
self.child_splitter = child_splitter
Expand All @@ -19,7 +20,10 @@ public class ParentDocumentRetriever: MultiVectorRetriever {
let parent_splitter: TextSplitter?
//The text splitter to use to create parent documents.
//If none, then the parent documents will be the raw documents passed in.
public func add_documents(documents: [Document]) async {
public func add_documents(documents: [Document]) async -> [String] {
if documents.isEmpty {
return []
}
var parent_documents: [Document]
if let p = self.parent_splitter {
parent_documents = p.split_documents(documents: documents)
Expand All @@ -42,6 +46,28 @@ public class ParentDocumentRetriever: MultiVectorRetriever {
await self.vectorstore.add_documents(documents: docs)
await self.docstore.mset(kvpairs: full_docs)
print("🚀 End add sub document \(docs.count), main document \(full_docs.count)")
return doc_ids
}

public func remove_documents(documents: [Document]) async {
if documents.isEmpty {
return
}
await self.docstore.mdelete(keys: documents.map {$0.metadata["id"]!})
var all_sub_docs = [Document]()
for main_doc in documents {
let sub_docs = self.child_splitter.split_documents(documents: [main_doc])
all_sub_docs.append(contentsOf: sub_docs)
}
print("🚀 Begin remove sub document \(all_sub_docs.count), main document \(documents.count)")
await self.vectorstore.remove_documents(sha256s: all_sub_docs.map {sha256(str: $0.page_content)})
print("🚀 End remove sub document \(all_sub_docs.count), main document \(documents.count)")
}

fileprivate func sha256(str: String) -> String {
let data = Data(str.utf8)
let hash = SHA256.hash(data: data)
return hash.compactMap { String(format: "%02x", $0) }.joined()
}
// def add_documents(
// self,
Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/schema/LocalFileStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public class LocalFileStore: BaseStore {
var allKeys: [String] = []
let allSHA = try await objectStore!.readAllKeys(namespace: STORE_NS)
for sha in allSHA {
print("sha: \(sha)")
// print("sha: \(sha)")
if sha == ".DS_Store" {
continue
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/utilities/TextSplitter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class TextSplitter {
public func split_text(text: String) -> [String] {
[]
}
func split_documents(documents: [Document]) -> [Document] {
public func split_documents(documents: [Document]) -> [Document] {
var new_documents: [Document] = []
for doc in documents {
for chunk in self.split_text(text: doc.page_content){
Expand Down
13 changes: 12 additions & 1 deletion Sources/LangChain/vectorstores/SimilaritySearchKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Foundation

#if os(macOS) || os(iOS) || os(visionOS)
import SimilaritySearchKit
import CryptoKit

private struct LangChainEmbeddingBridge: EmbeddingsProtocol {

Expand Down Expand Up @@ -67,13 +68,23 @@ public class SimilaritySearchKit: VectorStore {
}

override func addText(text: String, metadata: [String: String]) async {
await vs.addItem(id: UUID().uuidString, text: text, metadata: metadata)
await vs.addItem(id: sha256(str: text), text: text, metadata: metadata)
}

@available(iOS 16.0, *)
@available(macOS 13.0, *)
public func writeToFile() {
let _ = try? vs.saveIndex()
}

override func removeText(sha256: String) async {
vs.removeItem(id: sha256)
}

func sha256(str: String) -> String {
let data = Data(str.utf8)
let hash = SHA256.hash(data: data)
return hash.compactMap { String(format: "%02x", $0) }.joined()
}
}
#endif
17 changes: 10 additions & 7 deletions Sources/LangChain/vectorstores/VectorStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ public class VectorStore {
func addText(text: String, metadata: [String: String]) async {

}

func removeText(sha256: String) async {

}
func similaritySearch(query: String, k: Int) async -> [MatchedModel] {
[]
}
Expand All @@ -29,6 +31,13 @@ public class VectorStore {
}
}

func remove_documents(sha256s: [String]) async {
await withTaskGroup(of: Void.self) { [self] group in
for sha256 in sha256s {
group.addTask { await self.removeText(sha256: sha256)}
}
}
}
// def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
// """Run more documents through the embeddings and add to the vectorstore.
//
Expand All @@ -43,9 +52,3 @@ public class VectorStore {
// metadatas = [doc.metadata for doc in documents]
// return self.add_texts(texts, metadatas, **kwargs)
}

public protocol VectorStoreByUser {
func addText(text: String, user_id: String, metadata: [String: String]) async

func similaritySearch(query: String, k: Int, user_id: String) async -> [MatchedModel]
}
8 changes: 8 additions & 0 deletions Tests/LangChainTests/langchain_swiftTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,14 @@ Action Input: the input to the action
XCTAssertEqual(keys, ["2"])
}

func testSimilaritySearchKitSHA256() throws {
let vs = SimilaritySearchKit(embeddings: OpenAIEmbeddings())
let originalString = "Hello, World!"
let hashedString = vs.sha256(str: originalString)
print("🚗\(hashedString)")
XCTAssertNotNil(hashedString)
}


//
// func testYoutubeHackClientList() async throws {
Expand Down

0 comments on commit 8cc143b

Please sign in to comment.