Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
buhe committed Nov 18, 2023
2 parents e998953 + 6af24f8 commit 1d90a8b
Show file tree
Hide file tree
Showing 15 changed files with 1,134 additions and 745 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/swift.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ on:
jobs:
build:

runs-on: macos-latest
runs-on: macos-13

steps:
steps:
- uses: actions/checkout@v3
- name: Build
run: swift build -v
Expand Down
9 changes: 9 additions & 0 deletions Package.resolved
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,15 @@
"version" : "0.0.2"
}
},
{
"identity" : "similarity-search-kit",
"kind" : "remoteSourceControl",
"location" : "https://github.com/ZachNagengast/similarity-search-kit.git",
"state" : {
"revision" : "6d78d3051aa35f5701af34f9f49ab793a97c6b76",
"version" : "0.0.11"
}
},
{
"identity" : "storage-swift",
"kind" : "remoteSourceControl",
Expand Down
2 changes: 2 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ let package = Package(
.package(url: "https://github.com/drmohundro/SWXMLHash", .upToNextMajor(from: "7.0.2")),
.package(url: "https://github.com/scinfu/SwiftSoup", .upToNextMajor(from: "2.6.1")),
.package(url: "https://github.com/juyan/swift-filestore", .upToNextMajor(from: "0.2.0")),
.package(url: "https://github.com/ZachNagengast/similarity-search-kit.git", from: "0.0.11")
],
targets: [
// Targets are the basic building blocks of a package, defining a module or a test suite.
Expand All @@ -35,6 +36,7 @@ let package = Package(
.product(name: "SWXMLHash", package: "SWXMLHash"),
.product(name: "SwiftSoup", package: "SwiftSoup"),
.product(name: "SwiftFileStore", package: "swift-filestore"),
.product(name: "SimilaritySearchKit", package: "similarity-search-kit"),
]

),
Expand Down
2 changes: 1 addition & 1 deletion Sources/LangChain/document_loaders/BaseLoader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import Foundation
public struct Document {
public let page_content: String
public let metadata: [String: String]
public var metadata: [String: String]
}
public class BaseLoader {

Expand Down
47 changes: 47 additions & 0 deletions Sources/LangChain/retriever/MultiVectorRetriever.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//
// File.swift
//
//
// Created by 顾艳华 on 2023/11/17.
//

import Foundation
public class MultiVectorRetriever: BaseRetriever {
let vectorstore: VectorStore
let docstore: BaseStore
let id_key = "doc_id"

public init(vectorstore: VectorStore, docstore: BaseStore) {
self.vectorstore = vectorstore
self.docstore = docstore
}

public override func _get_relevant_documents(query: String) async throws -> [Document] {
let sub_docs = await self.vectorstore.similaritySearch(query: query, k: 1)
var ids: [String] = []
for d in sub_docs {
ids.append(d.metadata[self.id_key]!)
}
let docs = self.docstore.mget(keys: ids)
return docs.map{Document(page_content: $0, metadata: [:])}
}

// def _get_relevant_documents(
// self, query: str, *, run_manager: CallbackManagerForRetrieverRun
// ) -> List[Document]:
// """Get documents relevant to a query.
// Args:
// query: String to find relevant documents for
// run_manager: The callbacks handler to use
// Returns:
// List of relevant documents
// """
// sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
// # We do this to maintain the order of the ids that are returned
// ids = []
// for d in sub_docs:
// if d.metadata[self.id_key] not in ids:
// ids.append(d.metadata[self.id_key])
// docs = self.docstore.mget(ids)
// return [d for d in docs if d is not None]
}
25 changes: 25 additions & 0 deletions Sources/LangChain/schema/BaseStore.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//
// File.swift
//
//
// Created by 顾艳华 on 2023/11/17.
//

import Foundation
public class BaseStore {
func mget(keys: [String]) -> [String] {
[]
}

func mset(kvpairs: [(String, String)]) {

}

func mdelete(keys: [String]) {

}

func keys(prefix: String? = nil) -> [String] {
[]
}
}
50 changes: 50 additions & 0 deletions Sources/LangChain/schema/InMemoryStore.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//
// File.swift
//
//
// Created by 顾艳华 on 2023/11/17.
//

import Foundation
public class InMemoryStore: BaseStore {
var store:[String: String] = [:]
public override init() {
super.init()
}
override func mget(keys: [String]) -> [String] {
var values: [String] = []
for k in keys {
let v = self.store[k]
if v != nil {
values.append(v!)
}
}
return values
}

override func mset(kvpairs: [(String, String)]) {
for kv in kvpairs {
self.store[kv.0] = kv.1
}
}

override func mdelete(keys: [String]) {
for k in keys {
self.store.removeValue(forKey: k)
}
}

override func keys(prefix: String? = nil) -> [String] {
if prefix == nil {
return Array(self.store.keys)
} else {
var matched: [String] = []
for k in self.store.keys {
if k.hasPrefix(prefix!) {
matched.append(k)
}
}
return matched
}
}
}
143 changes: 139 additions & 4 deletions Sources/LangChain/utilities/TextSplitter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ public class TextSplitter {
self._chunk_size = chunk_size
self._chunk_overlap = chunk_overlap
}
func _split_text_with_regex(text: String) -> [String] {
text.components(separatedBy: "\n\n")
func _split_text_with_regex(text: String, separater: String) -> [String] {
text.components(separatedBy: separater)
}


func _join_docs(docs: [String]) -> String {
let text = docs.joined()
return text
Expand Down Expand Up @@ -63,18 +64,152 @@ public class TextSplitter {
docs.append(doc)
return docs
}
//
// def create_documents(
// self, texts: List[str], metadatas: Optional[List[dict]] = None
// ) -> List[Document]:
// """Create documents from a list of texts."""
// _metadatas = metadatas or [{}] * len(texts)
// documents = []
// for i, text in enumerate(texts):
// index = -1
// for chunk in self.split_text(text):
// metadata = copy.deepcopy(_metadatas[i])
// if self._add_start_index:
// index = text.find(chunk, index + 1)
// metadata["start_index"] = index
// new_doc = Document(page_content=chunk, metadata=metadata)
// documents.append(new_doc)
// return documents
//
// def split_documents(self, documents: Iterable[Document]) -> List[Document]:
// """Split documents."""
// texts, metadatas = [], []
// for doc in documents:
// texts.append(doc.page_content)
// metadatas.append(doc.metadata)
// return self.create_documents(texts, metadatas=metadatas)
public func split_text(text: String) -> [String] {
[]
}
func split_documents(documents: [Document]) -> [Document] {
var new_documents: [Document] = []
for doc in documents {
for chunk in self.split_text(text: doc.page_content){
let new_doc = Document(page_content: chunk, metadata: [:])
new_documents.append(new_doc)
}
}
return new_documents
}
}

public class CharacterTextSplitter: TextSplitter {
public override init(chunk_size: Int, chunk_overlap: Int) {
super.init(chunk_size: chunk_size, chunk_overlap: chunk_overlap)
}
public func split_text(text: String) -> [String] {
let splits = _split_text_with_regex(text: text)
public override func split_text(text: String) -> [String] {
let splits = _split_text_with_regex(text: text, separater: "\n\n")
// _separator = "" if self._keep_separator else self._separator
return self._merge_splits(splits: splits)
}
}

public class RecursiveCharacterTextSplitter: TextSplitter {
// def _split_text(self, text: str, separators: List[str]) -> List[str]:
// """Split incoming text and return chunks."""
// final_chunks = []
// # Get appropriate separator to use
// separator = separators[-1]
// new_separators = []
// for i, _s in enumerate(separators):
// _separator = _s if self._is_separator_regex else re.escape(_s)
// if _s == "":
// separator = _s
// break
// if re.search(_separator, text):
// separator = _s
// new_separators = separators[i + 1 :]
// break
//
// _separator = separator if self._is_separator_regex else re.escape(separator)
// splits = _split_text_with_regex(text, _separator, self._keep_separator)
//
// # Now go merging things, recursively splitting longer texts.
// _good_splits = []
// _separator = "" if self._keep_separator else separator
// for s in splits:
// if self._length_function(s) < self._chunk_size:
// _good_splits.append(s)
// else:
// if _good_splits:
// merged_text = self._merge_splits(_good_splits, _separator)
// final_chunks.extend(merged_text)
// _good_splits = []
// if not new_separators:
// final_chunks.append(s)
// else:
// other_info = self._split_text(s, new_separators)
// final_chunks.extend(other_info)
// if _good_splits:
// merged_text = self._merge_splits(_good_splits, _separator)
// final_chunks.extend(merged_text)
// return final_chunks
public override init(chunk_size: Int, chunk_overlap: Int) {
super.init(chunk_size: chunk_size, chunk_overlap: chunk_overlap)
}
public override func split_text(text: String) -> [String] {
return self._split_text(text: text, separators: ["\n\n", "\n", " ", ""])
}

func _split_text(text: String, separators: [String]) -> [String] {
//Split incoming text and return chunks.
var final_chunks: [String] = []
// Get appropriate separator to use
var separator = separators.last!
var new_separators: [String] = []
for i in 0..<separators.count {
let _s = separators[i]
if _s == "" {
separator = _s
break
}
if text.contains(_s) {
separator = _s
new_separators = Array(separators[i + 1 ..< separators.count])
break
}
}
// _separator = separator
let splits = _split_text_with_regex(text: text, separater: separator)

// Now go merging things, recursively splitting longer texts.
var _good_splits: [String] = []
// _separator = "" if self._keep_separator else separator
for s in splits {
if s.count < self._chunk_size {
_good_splits.append(s)
} else {
if !_good_splits.isEmpty {
let merged_text = self._merge_splits(splits: _good_splits)
final_chunks.append(contentsOf: merged_text)
_good_splits = []
}
if new_separators.isEmpty {
final_chunks.append(s)
} else {
let other_info = self._split_text(text: s, separators: new_separators)
final_chunks.append(contentsOf: other_info)
}
}
}
if !_good_splits.isEmpty {
let merged_text = self._merge_splits(splits: _good_splits)
final_chunks.append(contentsOf: merged_text)
}
return final_chunks
}
}
//class BaseDocumentTransformer(ABC):
// """Base interface for transforming documents."""
//
Expand Down
Loading

0 comments on commit 1d90a8b

Please sign in to comment.