Skip to content

Commit

Permalink
expand matric
Browse files Browse the repository at this point in the history
  • Loading branch information
ZanSara committed Feb 14, 2024
1 parent 7f5ed82 commit bbbf08c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/mongodb_atlas.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,8 @@ jobs:
strategy:
fail-fast: false
matrix:
# MongoDB Atlas run against a live instance so we should avoid overwhelming it
os: [ubuntu-latest]
python-version: ['3.10']
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v4
Expand Down
54 changes: 42 additions & 12 deletions integrations/mongodb_atlas/examples/example.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,42 @@
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = "mongodb+srv://sarazanzottera:[email protected]/?retryWrites=true&w=majority"
# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi("1"))
# Send a ping to confirm a successful connection
try:
client.admin.command("ping")
print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
print(e)
# Install required packages for this example, including mongodb-atlas-haystack and other libraries needed
# for Markdown conversion and embeddings generation. Use the following command:
#
# pip install mongodb-atlas-haystack markdown-it-py mdit_plain "sentence-transformers>=2.2.0"
#
# Download some Markdown files to index.
# git clone https://github.com/anakin87/neural-search-pills

import glob
from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore

# Provide your connection string
connection_string = input("Enter your MongoDB Atlas connection string: ")

# Initialize the document store
document_store = MongoDBAtlasDocumentStore(
mongo_connection_string=connection_string,
database_name="haystack_test",
collection_name="test_collection",
)

# Create the indexing Pipeline and index some documents
file_paths = glob.glob("neural-search-pills/pills/*.md")


indexing = Pipeline()
indexing.add_component("converter", MarkdownToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

indexing.run({"converter": {"sources": file_paths}})

print("Indexed documents:" + document_store.count_documents() + "\n - ".join(document_store.filter_documents()))

0 comments on commit bbbf08c

Please sign in to comment.