Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

misc. MontyStore improvements #827

Merged
merged 3 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ aioitertools==0.10.0
pydantic==1.10.2
fastapi==0.79.0
numpy==1.23.0;python_version>"3.8"
typing_extensions;python_version<"3.8"
pyzmq==24.0.1
dnspython==2.2.1
uvicorn==0.18.3
Expand Down
51 changes: 34 additions & 17 deletions src/maggma/stores/mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,22 @@
from itertools import chain, groupby
from socket import socket
import warnings
from typing import Dict, Iterator, List, Optional, Tuple, Union, Any, Callable

try:
from typing import (
Dict,
Iterator,
List,
Optional,
Tuple,
Union,
Any,
Callable,
Literal,
)
except ImportError:
from typing import Dict, Iterator, List, Optional, Tuple, Union, Any, Callable
from typing_extensions import Literal

import mongomock
import orjson
Expand All @@ -27,9 +42,9 @@
from maggma.utils import confirm_field_index, to_dt

try:
import montydb # type: ignore
from montydb import MontyClient, set_storage # type: ignore
except ImportError:
montydb = None
MontyClient = None


class SSHTunnel(MSONable):
Expand Down Expand Up @@ -872,7 +887,7 @@ def __eq__(self, other: object) -> bool:


@requires(
montydb,
MontyClient is not None,
"MontyStore requires MontyDB to be installed. See the MontyDB repository for more "
"information: https://github.com/davidlatwe/montydb",
)
Expand Down Expand Up @@ -903,7 +918,7 @@ def __init__(
collection_name,
database_path: str = None,
database_name: str = "db",
storage: str = "sqlite",
storage: Literal["sqlite", "flatfile", "lightning"] = "sqlite",
storage_kwargs: Optional[dict] = None,
client_kwargs: Optional[dict] = None,
**kwargs,
Expand All @@ -913,10 +928,12 @@ def __init__(

Args:
collection_name: Name for the collection.
database_path: Path to the directory containing the on-disk database files.
If None, the current working directory will be used.
database_path: Path to on-disk database files. If None, the current working
directory will be used.
database_name: The database name.
storage: The storage type. Options include "sqlite", "lightning", "flatfile".
storage: The storage type. Options include "sqlite", "lightning", "flatfile". Note that
although MontyDB supports in memory storage, this capability is disabled in maggma to avoid unintended behavior, since multiple
in-memory MontyStore would actually point to the same data.
storage_kwargs: Keyword arguments passed to ``montydb.set_storage``.
client_kwargs: Keyword arguments passed to the ``montydb.MontyClient``
constructor.
Expand All @@ -925,7 +942,6 @@ def __init__(
if database_path is None:
database_path = str(Path.cwd())

self.database = "MontyDB"
self.database_path = database_path
self.database_name = database_name
self.collection_name = collection_name
Expand All @@ -935,8 +951,8 @@ def __init__(
self.kwargs = kwargs
self.storage = storage
self.storage_kwargs = storage_kwargs or {
"use_bson": True,
"monty_version": "4.0",
"use_bson": True, # import pymongo's BSON; do not use montydb's
"mongo_version": "4.0",
}
self.client_kwargs = client_kwargs or {}
super(MongoStore, self).__init__(**kwargs) # noqa
Expand All @@ -948,17 +964,19 @@ def connect(self, force_reset: bool = False):
Args:
force_reset: Force connection reset.
"""
from montydb import set_storage, MontyClient # type: ignore

set_storage(self.database_path, storage=self.storage, **self.storage_kwargs)
# TODO - workaround, may be obviated by a future montydb update
if self.database_path != ":memory:":
set_storage(self.database_path, storage=self.storage, **self.storage_kwargs)
client = MontyClient(self.database_path, **self.client_kwargs)
if not self._coll or force_reset:
self._coll = client["db"][self.collection_name]
self._coll = client[self.database_name][self.collection_name]

@property
def name(self) -> str:
"""Return a string representing this data source."""
return f"monty://{self.database_path}/{self.database}/{self.collection_name}"
return (
f"monty://{self.database_path}/{self.database_name}/{self.collection_name}"
)

def count(
self,
Expand All @@ -973,7 +991,6 @@ def count(
hint: Dictionary of indexes to use as hints for query optimizer.
Keys are field names and values are 1 for ascending or -1 for descending.
"""

criteria = criteria if criteria else {}

hint_list = (
Expand Down
23 changes: 15 additions & 8 deletions tests/stores/test_mongolike.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from pymongo.errors import ConfigurationError, DocumentTooLarge, OperationFailure

from maggma.core import StoreError
from maggma.stores import JSONStore, MemoryStore, MongoStore, MongoURIStore
from maggma.stores.mongolike import MontyStore
from maggma.stores import JSONStore, MemoryStore, MongoStore, MongoURIStore, MontyStore
from maggma.validators import JSONSchemaValidator


Expand Down Expand Up @@ -304,6 +303,14 @@ def test_monty_store_connect(tmp_dir):
assert montystore._collection is not None
assert montystore.name is not None

# check that the kwargs work
with ScratchDir("."):
store = MontyStore("my_results", database_name="NotNamedDB")
store.connect()
store.update({"test": {"cow": "moo"}}, key="test")
store.close()
assert Path("NotNamedDB/my_results.collection").exists()


def test_monty_store_groupby(montystore):
montystore.update(
Expand Down Expand Up @@ -338,23 +345,23 @@ def test_monty_store_groupby(montystore):
assert len(data) == 2


def test_montystore_query(montystore):
def test_monty_store_query(montystore):
montystore._collection.insert_one({"a": 1, "b": 2, "c": 3})
assert montystore.query_one(properties=["a"])["a"] == 1
assert montystore.query_one(properties=["a"])["a"] == 1
assert montystore.query_one(properties=["b"])["b"] == 2
assert montystore.query_one(properties=["c"])["c"] == 3


def test_montystore_count(montystore):
def test_monty_store_count(montystore):
montystore._collection.insert_one({"a": 1, "b": 2, "c": 3})
assert montystore.count() == 1
montystore._collection.insert_one({"aa": 1, "b": 2, "c": 3})
assert montystore.count() == 2
assert montystore.count({"a": 1}) == 1


def test_montystore_distinct(montystore):
def test_monty_store_distinct(montystore):
montystore._collection.insert_one({"a": 1, "b": 2, "c": 3})
montystore._collection.insert_one({"a": 4, "d": 5, "e": 6, "g": {"h": 1}})
assert set(montystore.distinct("a")) == {1, 4}
Expand All @@ -375,7 +382,7 @@ def test_montystore_distinct(montystore):
assert montystore.distinct("i") == [None]


def test_montystore_update(montystore):
def test_monty_store_update(montystore):
montystore.update({"e": 6, "d": 4}, key="e")
assert (
montystore.query_one(criteria={"d": {"$exists": 1}}, properties=["d"])["d"] == 4
Expand All @@ -399,15 +406,15 @@ def test_montystore_update(montystore):
montystore.update({"e": "abc", "d": 3}, key="e")


def test_montystore_remove_docs(montystore):
def test_monty_store_remove_docs(montystore):
montystore._collection.insert_one({"a": 1, "b": 2, "c": 3})
montystore._collection.insert_one({"a": 4, "d": 5, "e": 6, "g": {"h": 1}})
montystore.remove_docs({"a": 1})
assert len(list(montystore.query({"a": 4}))) == 1
assert len(list(montystore.query({"a": 1}))) == 0


def test_montystore_last_updated(montystore):
def test_monty_store_last_updated(montystore):
assert montystore.last_updated == datetime.min
start_time = datetime.utcnow()
montystore._collection.insert_one({montystore.key: 1, "a": 1})
Expand Down