diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 1a4f91d1726..80674b5a37d 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -317,27 +317,23 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: GDriveMimeType.DOC.value, GDriveMimeType.PPT.value, GDriveMimeType.SPREADSHEET.value, - GDriveMimeType.PLAIN_TEXT.value, - GDriveMimeType.MARKDOWN.value, ]: - export_mime_type = "text/plain" - if mime_type == GDriveMimeType.SPREADSHEET.value: - export_mime_type = "text/csv" - elif mime_type == GDriveMimeType.PPT.value: - export_mime_type = "text/plain" - elif mime_type in [ - GDriveMimeType.PLAIN_TEXT.value, - GDriveMimeType.MARKDOWN.value, - ]: - export_mime_type = mime_type - - response = ( + export_mime_type = ( + "text/plain" + if mime_type != GDriveMimeType.SPREADSHEET.value + else "text/csv" + ) + return ( service.files() .export(fileId=file["id"], mimeType=export_mime_type) .execute() + .decode("utf-8") ) - return response.decode("utf-8") - + elif mime_type in [ + GDriveMimeType.PLAIN_TEXT.value, + GDriveMimeType.MARKDOWN.value, + ]: + return service.files().get_media(fileId=file["id"]).execute().decode("utf-8") elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() return docx_to_text(file=io.BytesIO(response))