Skip to content

Commit

Permalink
better logging and rough cut at testing
Browse files Browse the repository at this point in the history
  • Loading branch information
rkuo-danswer committed Nov 15, 2024
1 parent bdc5971 commit 2a492c3
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 8 deletions.
20 changes: 12 additions & 8 deletions backend/danswer/indexing/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,24 +238,28 @@ def index_doc_batch_prepare(
else documents
)

# Create a record in the DB for every updateable document.
# for all updatable docs, upsert into the DB
# Does not include doc_updated_at which is also used to indicate a successful update
_upsert_documents_in_db(
documents=updatable_docs,
index_attempt_metadata=index_attempt_metadata,
db_session=db_session,
if updatable_docs:
_upsert_documents_in_db(
documents=updatable_docs,
index_attempt_metadata=index_attempt_metadata,
db_session=db_session,
)

logger.info(
f"Upserted {len(updatable_docs)} changed docs out of "
f"{len(documents)} total docs into the DB"
)

# Upsert the document to cc pair relationship for all documents
# for all docs, upsert the document to cc pair relationship
upsert_document_by_connector_credential_pair(
db_session,
index_attempt_metadata.connector_id,
index_attempt_metadata.credential_id,
document_ids,
)

logger.info(f"Upserted {len(updatable_docs)} documents into the DB")

# No docs to process because the batch is empty or every doc was already indexed
if not updatable_docs:
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,59 @@ def test_connector_creation(reset: None) -> None:
assert cc_pair_info.creator_email == admin_user.email


# TODO(rkuo): will enable this once i have credentials on github
# def test_overlapping_connector_creation(reset: None) -> None:
# # Creating an admin user (first user created is automatically an admin)
# admin_user: DATestUser = UserManager.create(name="admin_user")

# config = {
# "wiki_base": os.environ["CONFLUENCE_TEST_SPACE_URL"],
# "space": os.environ["CONFLUENCE_TEST_SPACE"],
# "is_cloud": True,
# "page_id": "",
# }

# credential = {
# "confluence_username": os.environ["CONFLUENCE_USER_NAME"],
# "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"],
# }

# # store the time before we create the connector so that we know after
# # when the indexing should have started
# now = datetime.now(timezone.utc)

# # create connector
# cc_pair_1 = CCPairManager.create_from_scratch(
# source=DocumentSource.CONFLUENCE,
# connector_specific_config=config,
# credential_json=credential,
# user_performing_action=admin_user,
# )

# CCPairManager.wait_for_indexing(
# cc_pair_1, now, timeout=60, user_performing_action=admin_user
# )

# cc_pair_2 = CCPairManager.create_from_scratch(
# source=DocumentSource.CONFLUENCE,
# connector_specific_config=config,
# credential_json=credential,
# user_performing_action=admin_user,
# )

# CCPairManager.wait_for_indexing(
# cc_pair_2, now, timeout=60, user_performing_action=admin_user
# )

# info_1 = CCPairManager.get_single(cc_pair_1.id)
# assert info_1

# info_2 = CCPairManager.get_single(cc_pair_2.id)
# assert info_2

# assert info_1.num_docs_indexed == info_2.num_docs_indexed


def test_connector_deletion(reset: None, vespa_client: vespa_fixture) -> None:
# Creating an admin user (first user created is automatically an admin)
admin_user: DATestUser = UserManager.create(name="admin_user")
Expand Down

0 comments on commit 2a492c3

Please sign in to comment.