From 28ae9d49f413910016352172734d726333b78ae3 Mon Sep 17 00:00:00 2001 From: balomueller Date: Mon, 9 Sep 2024 12:15:17 +0200 Subject: [PATCH 1/4] Changes for smueller dev env --- compose-up.sh | 1 + deployment/docker_compose/docker-compose.dev.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 compose-up.sh diff --git a/compose-up.sh b/compose-up.sh new file mode 100644 index 00000000000..70c06893a3b --- /dev/null +++ b/compose-up.sh @@ -0,0 +1 @@ +docker compose -f docker-compose.dev.yml -p danswer-stack up -d \ No newline at end of file diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 7c570219e81..d249b23cfdc 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -319,7 +319,7 @@ services: - DOMAIN=localhost ports: - "80:80" - - "3000:80" # allow for localhost:3000 usage, since that is the norm + - "3333:80" # allow for localhost:3000 usage, since that is the norm volumes: - ../data/nginx:/etc/nginx/conf.d logging: From 6c84b9efc39c6c95469a34549f24c7516e462f26 Mon Sep 17 00:00:00 2001 From: balomueller Date: Mon, 9 Sep 2024 16:18:54 +0200 Subject: [PATCH 2/4] Support actual loading of PLAIN_TEXT and MARKDOWN --- .../connectors/google_drive/connector.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 1a4f91d1726..3810cba6af5 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -321,23 +321,22 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: GDriveMimeType.MARKDOWN.value, ]: export_mime_type = "text/plain" - if mime_type == GDriveMimeType.SPREADSHEET.value: - export_mime_type = "text/csv" - elif mime_type == GDriveMimeType.PPT.value: - export_mime_type = "text/plain" + if mime_type in [ + GDriveMimeType.SPREADSHEET.value, + GDriveMimeType.PPT.value, + ]: + export_mime_type = "text/csv" if mime_type == GDriveMimeType.SPREADSHEET.value else "text/plain" + return ( + service.files() + .export(fileId=file["id"], mimeType=export_mime_type) + .execute() + .decode("utf-8") + ) elif mime_type in [ GDriveMimeType.PLAIN_TEXT.value, GDriveMimeType.MARKDOWN.value, ]: - export_mime_type = mime_type - - response = ( - service.files() - .export(fileId=file["id"], mimeType=export_mime_type) - .execute() - ) - return response.decode("utf-8") - + return service.files().get_media(fileId=file["id"]).execute() elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() return docx_to_text(file=io.BytesIO(response)) From cfa7720c5754afbc8d9c11a134271930ddef0f29 Mon Sep 17 00:00:00 2001 From: balomueller Date: Mon, 9 Sep 2024 16:22:08 +0200 Subject: [PATCH 3/4] Revert "Changes for smueller dev env" This reverts commit 28ae9d49f413910016352172734d726333b78ae3. --- compose-up.sh | 1 - deployment/docker_compose/docker-compose.dev.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) delete mode 100644 compose-up.sh diff --git a/compose-up.sh b/compose-up.sh deleted file mode 100644 index 70c06893a3b..00000000000 --- a/compose-up.sh +++ /dev/null @@ -1 +0,0 @@ -docker compose -f docker-compose.dev.yml -p danswer-stack up -d \ No newline at end of file diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index d249b23cfdc..7c570219e81 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -319,7 +319,7 @@ services: - DOMAIN=localhost ports: - "80:80" - - "3333:80" # allow for localhost:3000 usage, since that is the norm + - "3000:80" # allow for localhost:3000 usage, since that is the norm volumes: - ../data/nginx:/etc/nginx/conf.d logging: From d031b04e663f647e7244225e7ea4f47bbdad379d Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Mon, 9 Sep 2024 15:36:19 -0700 Subject: [PATCH 4/4] k --- .../connectors/google_drive/connector.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 3810cba6af5..80674b5a37d 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -317,26 +317,23 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: GDriveMimeType.DOC.value, GDriveMimeType.PPT.value, GDriveMimeType.SPREADSHEET.value, + ]: + export_mime_type = ( + "text/plain" + if mime_type != GDriveMimeType.SPREADSHEET.value + else "text/csv" + ) + return ( + service.files() + .export(fileId=file["id"], mimeType=export_mime_type) + .execute() + .decode("utf-8") + ) + elif mime_type in [ GDriveMimeType.PLAIN_TEXT.value, GDriveMimeType.MARKDOWN.value, ]: - export_mime_type = "text/plain" - if mime_type in [ - GDriveMimeType.SPREADSHEET.value, - GDriveMimeType.PPT.value, - ]: - export_mime_type = "text/csv" if mime_type == GDriveMimeType.SPREADSHEET.value else "text/plain" - return ( - service.files() - .export(fileId=file["id"], mimeType=export_mime_type) - .execute() - .decode("utf-8") - ) - elif mime_type in [ - GDriveMimeType.PLAIN_TEXT.value, - GDriveMimeType.MARKDOWN.value, - ]: - return service.files().get_media(fileId=file["id"]).execute() + return service.files().get_media(fileId=file["id"]).execute().decode("utf-8") elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() return docx_to_text(file=io.BytesIO(response))