Skip to content

Commit

Permalink
Merge branch 'main' into local
Browse files Browse the repository at this point in the history
  • Loading branch information
buhe committed Feb 18, 2024
2 parents 6113b4a + c8da24f commit d85c53a
Show file tree
Hide file tree
Showing 14 changed files with 200 additions and 43 deletions.
27 changes: 18 additions & 9 deletions Package.resolved
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
"version" : "1.20.1"
}
},
{
"identity" : "feedkit",
"kind" : "remoteSourceControl",
"location" : "https://github.com/nmdias/FeedKit",
"state" : {
"revision" : "68493a33d862c33c9a9f67ec729b3b7df1b20ade",
"version" : "9.1.2"
}
},
{
"identity" : "functions-swift",
"kind" : "remoteSourceControl",
Expand Down Expand Up @@ -77,8 +86,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/buhe/openai-kit",
"state" : {
"revision" : "ead156fa2e25e4e469cd837fda08836fb880a834",
"version" : "1.8.3"
"revision" : "f0c785d22dd14f71a42ad83cedbbddc02591aadb",
"version" : "1.8.5"
}
},
{
Expand All @@ -102,10 +111,10 @@
{
"identity" : "similarity-search-kit",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ZachNagengast/similarity-search-kit.git",
"location" : "https://github.com/buhe/similarity-search-kit",
"state" : {
"revision" : "6d78d3051aa35f5701af34f9f49ab793a97c6b76",
"version" : "0.0.11"
"revision" : "39bbd0b5c2662728792ee9be132212e5fd4a49e2",
"version" : "0.0.16"
}
},
{
Expand Down Expand Up @@ -149,8 +158,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-collections.git",
"state" : {
"revision" : "d029d9d39c87bed85b1c50adee7c41795261a192",
"version" : "1.0.6"
"revision" : "94cf62b3ba8d4bed62680a282d4c25f9c63c2efb",
"version" : "1.1.0"
}
},
{
Expand Down Expand Up @@ -266,8 +275,8 @@
"kind" : "remoteSourceControl",
"location" : "https://github.com/buhe/SwiftyNotion",
"state" : {
"revision" : "8ff884579370c0ebbc81ae1d6b8e9b62076d25b7",
"version" : "0.1.2"
"revision" : "61af40905bfd5eb8cad454e02095dfbdea0498cb",
"version" : "0.1.3"
}
},
{
Expand Down
9 changes: 6 additions & 3 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ let package = Package(
targets: ["LangChain"]),
],
dependencies: [
.package(url: "https://github.com/buhe/openai-kit", .upToNextMajor(from: "1.8.3")),
.package(url: "https://github.com/buhe/openai-kit", .upToNextMajor(from: "1.8.5")),
.package(url: "https://github.com/supabase-community/supabase-swift", .upToNextMajor(from: "0.2.1")),
.package(url: "https://github.com/SwiftyJSON/SwiftyJSON", .upToNextMajor(from: "5.0.1")),
.package(url: "https://github.com/drmohundro/SWXMLHash", .upToNextMajor(from: "7.0.2")),
.package(url: "https://github.com/scinfu/SwiftSoup", .upToNextMajor(from: "2.6.1")),
.package(url: "https://github.com/juyan/swift-filestore", .upToNextMajor(from: "0.5.0")),
.package(url: "https://github.com/ZachNagengast/similarity-search-kit.git", from: "0.0.11"),
.package(url: "https://github.com/buhe/similarity-search-kit", from: "0.0.16"),
.package(url: "https://github.com/google/generative-ai-swift", .upToNextMajor(from: "0.4.4")),
.package(url: "https://github.com/buhe/SwiftyNotion", .upToNextMajor(from: "0.1.2")),
.package(url: "https://github.com/buhe/llmfarm_core.swift", .branch("langchain")),
.package(url: "https://github.com/buhe/SwiftyNotion", .upToNextMajor(from: "0.1.3")),
.package(url: "https://github.com/nmdias/FeedKit", .upToNextMajor(from: "9.1.2")),
],
targets: [
// Targets are the basic building blocks of a package, defining a module or a test suite.
Expand All @@ -42,9 +43,11 @@ let package = Package(
.product(name: "SwiftSoup", package: "SwiftSoup"),
.product(name: "SwiftFileStore", package: "swift-filestore"),
.product(name: "SimilaritySearchKit", package: "similarity-search-kit", condition: .when(platforms: [.macOS, .iOS, .visionOS])),
// .product(name: "SimilaritySearchKitDistilbert", package: "similarity-search-kit", condition: .when(platforms: [.macOS, .iOS, .visionOS])),
.product(name: "GoogleGenerativeAI", package: "generative-ai-swift"),
.product(name: "SwiftyNotion", package: "SwiftyNotion"),
.product(name: "llmfarm_core", package: "llmfarm_core.swift"),
.product(name: "FeedKit", package: "FeedKit"),
]

),
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ Task(priority: .background) {
- [x] FileStore
- Embedding
- [x] OpenAI
- [ ] Distilbert
- Chain
- [x] Base
- [x] LLM
Expand Down Expand Up @@ -441,6 +442,7 @@ Task(priority: .background) {
- [x] ImageOCRLoader
- [x] AudioLoader
- [x] NotionLoader
- [x] RSSLoader
- OutputParser
- [x] MRKLOutputParser
- [x] ListOutputParser
Expand Down
9 changes: 8 additions & 1 deletion Sources/LangChain/document_loaders/BaseLoader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@
//

import Foundation
public struct Document {
public struct Document: Equatable {
public init(page_content: String, metadata: [String : String]) {
self.page_content = page_content
self.metadata = metadata
}
public let page_content: String
public var metadata: [String: String]
public static func == (lhs: Document, rhs: Document) -> Bool {
return lhs.page_content == rhs.page_content
}
}
public class BaseLoader {

Expand Down
54 changes: 54 additions & 0 deletions Sources/LangChain/document_loaders/RSSLoader.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//
// File.swift
//
//
// Created by 顾艳华 on 2/10/24.
//

import Foundation
import FeedKit

public class RSSLoader: BaseLoader {
let url: String

public init(url: String, callbacks: [BaseCallbackHandler] = []) {
self.url = url
super.init(callbacks: callbacks)
}
public override func _load() async throws -> [Document] {
let feedURL = URL(string: url)!
let parser = FeedParser(URL: feedURL)
let result = parser.parse()
switch result {
case .success(let feed):

// Grab the parsed feed directly as an optional rss, atom or json feed object
switch feed {
case let .atom(feed):
var content = [Document]()
for f in feed.entries ?? [] {
content.append(Document(page_content: f.title ?? "", metadata: [:]))
}
return content
case let .rss(feed):
var content = [Document]()
for f in feed.items ?? [] {
content.append(Document(page_content: f.title ?? "", metadata: [:]))
}
return content
case let .json(feed):
var content = [Document]()
for f in feed.items ?? [] {
content.append(Document(page_content: f.title ?? "", metadata: [:]))
}
return content
}


case .failure(let error):
print(error)
return []
}

}
}
22 changes: 22 additions & 0 deletions Sources/LangChain/embeddings/Distilbert.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
////
//// File.swift
////
////
//// Created by 顾艳华 on 2/11/24.
////
//
//import Foundation
//import SimilaritySearchKitDistilbert
//
//@available(macOS 13.0, *)
//public struct Distilbert: Embeddings {
// let n = DistilbertEmbeddings()
// public init() {
//
// }
//
//
// public func embedQuery(text: String) async -> [Float] {
// await n.encode(sentence: text)!
// }
//}
14 changes: 5 additions & 9 deletions Sources/LangChain/embeddings/OpenAIEmbeddings.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,26 @@ import AsyncHTTPClient
import OpenAIKit

public struct OpenAIEmbeddings: Embeddings {
public init() {

let session: URLSession
public init(session: URLSession = URLSession(configuration: .default)) {
self.session = session
}

// public func embedDocuments(texts: [String]) -> [[Float]] {
// []
// }

public func embedQuery(text: String) async -> [Float] {
let eventLoopGroup = ThreadManager.thread

let env = Env.loadEnv()

if let apiKey = env["OPENAI_API_KEY"] {
let baseUrl = env["OPENAI_API_BASE"] ?? "api.openai.com"

let httpClient = HTTPClient(eventLoopGroupProvider: .shared(eventLoopGroup))
let configuration = Configuration(apiKey: apiKey, api: API(scheme: .https, host: baseUrl))

let openAIClient = OpenAIKit.Client(httpClient: httpClient, configuration: configuration)
defer {
// it's important to shutdown the httpClient after all requests are done, even if one failed. See: https://github.com/swift-server/async-http-client
try? httpClient.syncShutdown()
}
let openAIClient = OpenAIKit.Client(session: session, configuration: configuration)

do {
let embedding = try await openAIClient.embeddings.create(input: text)

Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/retriever/MultiVectorRetriever.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public class MultiVectorRetriever: BaseRetriever {
}

public override func _get_relevant_documents(query: String) async throws -> [Document] {
let sub_docs = await self.vectorstore.similaritySearch(query: query, k: 1)
let sub_docs = await self.vectorstore.similaritySearch(query: query, k: 2)
var ids: [String] = []
for d in sub_docs {
ids.append(d.metadata[self.id_key]!)
Expand Down
30 changes: 28 additions & 2 deletions Sources/LangChain/retriever/ParentDocumentRetriever.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
//
// Created by 顾艳华 on 2023/11/17.
//

import CryptoKit
import Foundation

public class ParentDocumentRetriever: MultiVectorRetriever {
public init(child_splitter: TextSplitter, parent_splitter: TextSplitter? = nil, vectorstore: VectorStore, docstore: BaseStore) {
self.child_splitter = child_splitter
Expand All @@ -19,7 +20,10 @@ public class ParentDocumentRetriever: MultiVectorRetriever {
let parent_splitter: TextSplitter?
//The text splitter to use to create parent documents.
//If none, then the parent documents will be the raw documents passed in.
public func add_documents(documents: [Document]) async {
public func add_documents(documents: [Document]) async -> [String] {
if documents.isEmpty {
return []
}
var parent_documents: [Document]
if let p = self.parent_splitter {
parent_documents = p.split_documents(documents: documents)
Expand All @@ -42,6 +46,28 @@ public class ParentDocumentRetriever: MultiVectorRetriever {
await self.vectorstore.add_documents(documents: docs)
await self.docstore.mset(kvpairs: full_docs)
print("🚀 End add sub document \(docs.count), main document \(full_docs.count)")
return doc_ids
}

public func remove_documents(documents: [Document]) async {
if documents.isEmpty {
return
}
await self.docstore.mdelete(keys: documents.map {$0.metadata["id"]!})
var all_sub_docs = [Document]()
for main_doc in documents {
let sub_docs = self.child_splitter.split_documents(documents: [main_doc])
all_sub_docs.append(contentsOf: sub_docs)
}
print("🚀 Begin remove sub document \(all_sub_docs.count), main document \(documents.count)")
await self.vectorstore.remove_documents(sha256s: all_sub_docs.map {sha256(str: $0.page_content)})
print("🚀 End remove sub document \(all_sub_docs.count), main document \(documents.count)")
}

fileprivate func sha256(str: String) -> String {
let data = Data(str.utf8)
let hash = SHA256.hash(data: data)
return hash.compactMap { String(format: "%02x", $0) }.joined()
}
// def add_documents(
// self,
Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/schema/LocalFileStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public class LocalFileStore: BaseStore {
var allKeys: [String] = []
let allSHA = try await objectStore!.readAllKeys(namespace: STORE_NS)
for sha in allSHA {
print("sha: \(sha)")
// print("sha: \(sha)")
if sha == ".DS_Store" {
continue
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/utilities/TextSplitter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class TextSplitter {
public func split_text(text: String) -> [String] {
[]
}
func split_documents(documents: [Document]) -> [Document] {
public func split_documents(documents: [Document]) -> [Document] {
var new_documents: [Document] = []
for doc in documents {
for chunk in self.split_text(text: doc.page_content){
Expand Down
42 changes: 37 additions & 5 deletions Sources/LangChain/vectorstores/SimilaritySearchKit.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Foundation

#if os(macOS) || os(iOS) || os(visionOS)
import SimilaritySearchKit
import CryptoKit

private struct LangChainEmbeddingBridge: EmbeddingsProtocol {

Expand All @@ -32,27 +33,58 @@ private struct LangChainEmbeddingBridge: EmbeddingsProtocol {
}
let embeddings: Embeddings
func encode(sentence: String) async -> [Float]? {
await embeddings.embedQuery(text: sentence)
let e = await embeddings.embedQuery(text: sentence)
if e.isEmpty {
print("⚠️\(sentence.prefix(100))")
}
return e
}


}
public class SimilaritySearchKit: VectorStore {
let vs: SimilarityIndex

public init(embeddings: Embeddings) async {
self.vs = await SimilarityIndex(
public init(embeddings: Embeddings, autoLoad: Bool = false) {
self.vs = SimilarityIndex(
model: LangChainEmbeddingBridge(embeddings: embeddings),
metric: CosineSimilarity()
metric: DotProduct()
)
if #available(macOS 13.0, *) {
if #available(iOS 16.0, *) {
if autoLoad {
let _ = try? vs.loadIndex()
} else {
// Fallback on earlier versions
}
}
} else {
// Fallback on earlier versions
}
}

override func similaritySearch(query: String, k: Int) async -> [MatchedModel] {
await vs.search(query, top: k).map{MatchedModel(content: $0.text, similarity: $0.score, metadata: $0.metadata)}
}

override func addText(text: String, metadata: [String: String]) async {
await vs.addItem(id: UUID().uuidString, text: text, metadata: metadata)
await vs.addItem(id: sha256(str: text), text: text, metadata: metadata)
}

@available(iOS 16.0, *)
@available(macOS 13.0, *)
public func writeToFile() {
let _ = try? vs.saveIndex()
}

override func removeText(sha256: String) async {
vs.removeItem(id: sha256)
}

func sha256(str: String) -> String {
let data = Data(str.utf8)
let hash = SHA256.hash(data: data)
return hash.compactMap { String(format: "%02x", $0) }.joined()
}
}
#endif
Loading

0 comments on commit d85c53a

Please sign in to comment.