From 0b1db1cdd2e7368b960a4e44e57502a9ce7b45c2 Mon Sep 17 00:00:00 2001 From: lombax85 Date: Sun, 8 Sep 2024 05:28:23 +0200 Subject: [PATCH] Google Drive connector - txt and markdown support (#1469) --- backend/danswer/connectors/google_drive/connector.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 90e5b0ed5db..27c82f8f498 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -62,6 +62,8 @@ class GDriveMimeType(str, Enum): POWERPOINT = ( "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) + PLAIN_TEXT = "text/plain" + MARKDOWN = "text/markdown" GoogleDriveFileType = dict[str, Any] @@ -315,12 +317,19 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: GDriveMimeType.DOC.value, GDriveMimeType.PPT.value, GDriveMimeType.SPREADSHEET.value, + GDriveMimeType.PLAIN_TEXT.value, + GDriveMimeType.MARKDOWN.value, ]: export_mime_type = "text/plain" if mime_type == GDriveMimeType.SPREADSHEET.value: export_mime_type = "text/csv" elif mime_type == GDriveMimeType.PPT.value: export_mime_type = "text/plain" + elif mime_type in [ + GDriveMimeType.PLAIN_TEXT.value, + GDriveMimeType.MARKDOWN.value + ]: + export_mime_type = mime_type response = ( service.files()