From e348a3479e59b64bacb53c771ad9f6dc5119296b Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Thu, 22 Aug 2024 14:16:31 -0700 Subject: [PATCH 01/12] add file storage option, validator --- kolibri/utils/options.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/kolibri/utils/options.py b/kolibri/utils/options.py index 7f2ea3327c3..466376abff8 100644 --- a/kolibri/utils/options.py +++ b/kolibri/utils/options.py @@ -5,6 +5,7 @@ in the options.ini file. """ import ast +import importlib import logging import os import sys @@ -15,6 +16,7 @@ from configobj import ConfigObj from configobj import flatten_errors from configobj import get_extra_values +from django.core.files.storage import Storage from django.utils.functional import SimpleLazyObject from django.utils.module_loading import import_string from validate import is_boolean @@ -271,6 +273,22 @@ def multiprocess_bool(value): return False +def inherits_from_storage(value): + try: + modules = value.split(".") + klass = modules.pop() + module_path = ".".join(modules) + module = importlib.import_module(module_path) + Klass = getattr(module, klass) + return issubclass(Klass, Storage) + except ImportError: + logger.error("Default file storage is not available.") + raise VdtValueError(value) + except Exception: + logger.error("{} is not a valid Python module path".format(value)) + raise VdtValueError(value) + + def cache_option(value): """ Validate the cache options. @@ -385,6 +403,16 @@ def csp_source_list(value): base_option_spec = { + "FileStorage": { + "DEFAULT_FILE_STORAGE": { + "type": "file_storage_option", + "default": "django.core.files.storage.FileSystemStorage", + "description": """ + The storage backend class that Django will use when managing files. The class given here must implement + the Django files.storage.Storage class. + """, + } + }, "Cache": { "CACHE_BACKEND": { "type": "cache_option", @@ -780,6 +808,7 @@ def _get_validator(): "bytes": validate_bytes, "multiprocess_bool": multiprocess_bool, "cache_option": cache_option, + "file_storage_option": inherits_from_storage, "lazy_import_callback_list": lazy_import_callback_list, "csp_source_list": csp_source_list, } From ee61e610ae96dff6d9ec6bea55dc296abde3907f Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 23 Aug 2024 12:57:53 -0700 Subject: [PATCH 02/12] WIP --- kolibri/deployment/default/settings/base.py | 10 ++++++++ kolibri/utils/options.py | 27 ++++----------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/kolibri/deployment/default/settings/base.py b/kolibri/deployment/default/settings/base.py index 66c91932a09..a346e2d061a 100644 --- a/kolibri/deployment/default/settings/base.py +++ b/kolibri/deployment/default/settings/base.py @@ -191,6 +191,16 @@ DEFAULT_AUTO_FIELD = "django.db.models.AutoField" +# File Storage Backend +# https://docs.djangoproject.com/en/3.2/ref/files/storage/ + +if not os.environ.get("DEFAULT_FILE_STORAGE"): + if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "file_system": + DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage" + elif conf.OPTIONS["FileStorage"]["STORAGE_BAKCEND"] == "gcloud": + DEFAULT_FILE_STORAGE = "some-other-thing-gotta-figure-that-out" + + # Internationalization # https://docs.djangoproject.com/en/3.2/topics/i18n/ diff --git a/kolibri/utils/options.py b/kolibri/utils/options.py index 466376abff8..a78594f25f8 100644 --- a/kolibri/utils/options.py +++ b/kolibri/utils/options.py @@ -5,7 +5,6 @@ in the options.ini file. """ import ast -import importlib import logging import os import sys @@ -273,22 +272,6 @@ def multiprocess_bool(value): return False -def inherits_from_storage(value): - try: - modules = value.split(".") - klass = modules.pop() - module_path = ".".join(modules) - module = importlib.import_module(module_path) - Klass = getattr(module, klass) - return issubclass(Klass, Storage) - except ImportError: - logger.error("Default file storage is not available.") - raise VdtValueError(value) - except Exception: - logger.error("{} is not a valid Python module path".format(value)) - raise VdtValueError(value) - - def cache_option(value): """ Validate the cache options. @@ -404,14 +387,15 @@ def csp_source_list(value): base_option_spec = { "FileStorage": { - "DEFAULT_FILE_STORAGE": { - "type": "file_storage_option", - "default": "django.core.files.storage.FileSystemStorage", + "STORAGE_BACKEND": { + "type": "option", + "options": ("file_system", "gcloud"), + "default": "file_system", "description": """ The storage backend class that Django will use when managing files. The class given here must implement the Django files.storage.Storage class. """, - } + }, }, "Cache": { "CACHE_BACKEND": { @@ -808,7 +792,6 @@ def _get_validator(): "bytes": validate_bytes, "multiprocess_bool": multiprocess_bool, "cache_option": cache_option, - "file_storage_option": inherits_from_storage, "lazy_import_callback_list": lazy_import_callback_list, "csp_source_list": csp_source_list, } From fced765b3025aa49d88710fe418bb899b099c9b9 Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 23 Aug 2024 15:13:28 -0700 Subject: [PATCH 03/12] validate storage option; add reqs/storages.txt --- kolibri/deployment/default/settings/base.py | 7 ++++-- kolibri/utils/options.py | 24 ++++++++++++++++++--- requirements/storages.txt | 2 ++ 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 requirements/storages.txt diff --git a/kolibri/deployment/default/settings/base.py b/kolibri/deployment/default/settings/base.py index a346e2d061a..27acbbae7b3 100644 --- a/kolibri/deployment/default/settings/base.py +++ b/kolibri/deployment/default/settings/base.py @@ -197,8 +197,11 @@ if not os.environ.get("DEFAULT_FILE_STORAGE"): if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "file_system": DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage" - elif conf.OPTIONS["FileStorage"]["STORAGE_BAKCEND"] == "gcloud": - DEFAULT_FILE_STORAGE = "some-other-thing-gotta-figure-that-out" + elif conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "gcs": + # https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage + GS_DEFAULT_ACL = "publicRead" + DEFAULT_FILE_STORAGE = "storages.backends.gcloud.GoogleCloudStorage" + # GS_PROJECT_ID, GS_CREDENTIALS, etc should be inferred from the environment # Internationalization diff --git a/kolibri/utils/options.py b/kolibri/utils/options.py index a78594f25f8..2674b500a2a 100644 --- a/kolibri/utils/options.py +++ b/kolibri/utils/options.py @@ -15,7 +15,6 @@ from configobj import ConfigObj from configobj import flatten_errors from configobj import get_extra_values -from django.core.files.storage import Storage from django.utils.functional import SimpleLazyObject from django.utils.module_loading import import_string from validate import is_boolean @@ -272,6 +271,24 @@ def multiprocess_bool(value): return False +def storage_option(value, *opts): + """ + Validate the storage options. + Check that the given option is valid, then check that needed external + libraries are available where relevant. + """ + value = is_option(value, *opts) + if value == "gcs": + try: + from storages.backends.gcloud import GoogleCloudStorage # noqa + except ModuleNotFoundError: + logger.error( + "Google Cloud Storage backend is not available.", + "Are storage requirements installed?", + ) + raise VdtValueError(value) + + def cache_option(value): """ Validate the cache options. @@ -388,8 +405,8 @@ def csp_source_list(value): base_option_spec = { "FileStorage": { "STORAGE_BACKEND": { - "type": "option", - "options": ("file_system", "gcloud"), + "type": "storage_option", + "options": ("file_system", "gcs"), "default": "file_system", "description": """ The storage backend class that Django will use when managing files. The class given here must implement @@ -791,6 +808,7 @@ def _get_validator(): "url_prefix": url_prefix, "bytes": validate_bytes, "multiprocess_bool": multiprocess_bool, + "storage_option": storage_option, "cache_option": cache_option, "lazy_import_callback_list": lazy_import_callback_list, "csp_source_list": csp_source_list, diff --git a/requirements/storages.txt b/requirements/storages.txt new file mode 100644 index 00000000000..a20ce2fe0c2 --- /dev/null +++ b/requirements/storages.txt @@ -0,0 +1,2 @@ +# Additional reqs for running kolibri with GCS file storage backend +django-storages[google]==1.14.2 From 250558f908482b4519c1fd48cb9411d9c3095026 Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Mon, 28 Oct 2024 16:19:40 -0700 Subject: [PATCH 04/12] return the value from gcs storage_option validation fn --- kolibri/utils/options.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kolibri/utils/options.py b/kolibri/utils/options.py index 2674b500a2a..b4eba78388d 100644 --- a/kolibri/utils/options.py +++ b/kolibri/utils/options.py @@ -281,6 +281,8 @@ def storage_option(value, *opts): if value == "gcs": try: from storages.backends.gcloud import GoogleCloudStorage # noqa + + return value except ModuleNotFoundError: logger.error( "Google Cloud Storage backend is not available.", From 7c44493d41adf30dac4732a0d1df48588c33acda Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Wed, 30 Oct 2024 15:26:54 -0700 Subject: [PATCH 05/12] simplify imports, add filestorage util class to pass importpath to DEFAULT_FILE_STORAGE --- kolibri/deployment/default/settings/base.py | 13 +++++-------- kolibri/utils/file_storage.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 kolibri/utils/file_storage.py diff --git a/kolibri/deployment/default/settings/base.py b/kolibri/deployment/default/settings/base.py index 27acbbae7b3..927725be4fb 100644 --- a/kolibri/deployment/default/settings/base.py +++ b/kolibri/deployment/default/settings/base.py @@ -195,14 +195,11 @@ # https://docs.djangoproject.com/en/3.2/ref/files/storage/ if not os.environ.get("DEFAULT_FILE_STORAGE"): - if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "file_system": - DEFAULT_FILE_STORAGE = "django.core.files.storage.FileSystemStorage" - elif conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "gcs": - # https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage - GS_DEFAULT_ACL = "publicRead" - DEFAULT_FILE_STORAGE = "storages.backends.gcloud.GoogleCloudStorage" - # GS_PROJECT_ID, GS_CREDENTIALS, etc should be inferred from the environment - + if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "gcs": + if DEBUG: + DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorageDebug" + else: + DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorage" # Internationalization # https://docs.djangoproject.com/en/3.2/topics/i18n/ diff --git a/kolibri/utils/file_storage.py b/kolibri/utils/file_storage.py new file mode 100644 index 00000000000..b55dad2b6be --- /dev/null +++ b/kolibri/utils/file_storage.py @@ -0,0 +1,19 @@ +# https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage +from storages.backends.gcloud import GoogleCloudStorage # noqa + + +class KolibriFileStorage(GoogleCloudStorage): + # TODO Should this be like the device ID or something? + default_acl = "publicRead" + bucket_name = "kolibri" + + +class KolibriFileStorageDebug(KolibriFileStorage): + """ + See: https://github.com/fullstorydev/emulators?tab=readme-ov-file#google-cloud-storage-emulator + Once installed, run `gscemulator -port 7070` + This field being set will allow our GoogleCloudStorage library to interface with the emulator + """ + + custom_endpoint = "http://localhost:7070" + bucket_name = "kolibri_debug_bucket" From b792ce9fc57a2309e871f873c9015e1f8df15edb Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Wed, 30 Oct 2024 15:29:11 -0700 Subject: [PATCH 06/12] exportusers & csv_utils updated to use DefaultStorage --- kolibri/core/auth/csv_utils.py | 64 ++++++++++--------- .../auth/management/commands/exportusers.py | 5 +- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/kolibri/core/auth/csv_utils.py b/kolibri/core/auth/csv_utils.py index 3a1dc8c8edf..66d839162b8 100644 --- a/kolibri/core/auth/csv_utils.py +++ b/kolibri/core/auth/csv_utils.py @@ -1,9 +1,10 @@ import csv +import io import logging -import os from collections import OrderedDict from functools import partial +from django.core.files.storage import DefaultStorage from django.db.models import OuterRef from django.db.models import Q @@ -14,9 +15,10 @@ from kolibri.core.auth.models import Facility from kolibri.core.auth.models import FacilityUser from kolibri.core.query import SQCount -from kolibri.core.utils.csv import open_csv_for_writing from kolibri.core.utils.csv import output_mapper +# from kolibri.core.utils.csv import open_csv_for_writing + logger = logging.getLogger(__name__) @@ -159,9 +161,11 @@ def map_input(obj): ) -def csv_file_generator(facility, filepath, overwrite=True, demographic=False): - if not overwrite and os.path.exists(filepath): - raise ValueError("{} already exists".format(filepath)) +def csv_file_generator(facility, filename, overwrite=True, demographic=False): + fs = DefaultStorage() + + # if not overwrite and os.path.exists(filename): + # raise ValueError("{} already exists".format(filepath)) queryset = FacilityUser.objects.filter(facility=facility) header_labels = tuple( @@ -174,7 +178,7 @@ def csv_file_generator(facility, filepath, overwrite=True, demographic=False): column for column in db_columns if demographic or column not in DEMO_FIELDS ) - csv_file = open_csv_for_writing(filepath) + csv_file = io.StringIO() # open_csv_for_writing(filepath) mappings = {} @@ -184,27 +188,29 @@ def csv_file_generator(facility, filepath, overwrite=True, demographic=False): map_output = partial(output_mapper, labels=labels, output_mappings=mappings) - with csv_file as f: - writer = csv.DictWriter(f, header_labels) - logger.info("Creating csv file {filename}".format(filename=filepath)) - writer.writeheader() - usernames = set() - for item in ( - queryset.select_related("facility") - .annotate( - classroom_count=SQCount( - Classroom.objects.filter(membership__user=OuterRef("id")), - field="id", - ) - ) - .prefetch_related("memberships__collection") - .filter( - Q(memberships__collection__kind=CLASSROOM) - | Q(memberships__collection__isnull=True) + writer = csv.DictWriter(csv_file, header_labels) + logger.info("Creating csv file {filename}".format(filename=filename)) + writer.writeheader() + usernames = set() + for item in ( + queryset.select_related("facility") + .annotate( + classroom_count=SQCount( + Classroom.objects.filter(membership__user=OuterRef("id")), + field="id", ) - .values(*columns) - ): - if item["username"] not in usernames: - writer.writerow(map_output(item)) - usernames.add(item["username"]) - yield + ) + .prefetch_related("memberships__collection") + .filter( + Q(memberships__collection__kind=CLASSROOM) + | Q(memberships__collection__isnull=True) + ) + .values(*columns) + ): + if item["username"] not in usernames: + writer.writerow(map_output(item)) + usernames.add(item["username"]) + yield + csv_file.seek(0) + file = fs.save(filename, csv_file) + logger.info("File saved - Path: {} URL: {}".format(fs.path(file), fs.url(file))) diff --git a/kolibri/core/auth/management/commands/exportusers.py b/kolibri/core/auth/management/commands/exportusers.py index f65d065b084..57c3228080d 100644 --- a/kolibri/core/auth/management/commands/exportusers.py +++ b/kolibri/core/auth/management/commands/exportusers.py @@ -1,5 +1,4 @@ import logging -import os import sys from django.core.management.base import CommandError @@ -57,15 +56,13 @@ def handle_async(self, *args, **options): else: filename = options["output_file"] - filepath = os.path.join(os.getcwd(), filename) - total_rows = FacilityUser.objects.filter(facility=facility).count() with self.start_progress(total=total_rows) as progress_update: try: for row in csv_file_generator( facility, - filepath, + filename, overwrite=options["overwrite"], demographic=options["demographic"], ): From 237d65713b94544c6bf66876b9dcf456fb2aba9b Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 20 Dec 2024 14:59:44 -0800 Subject: [PATCH 07/12] simplify gcs settings and calsses --- kolibri/deployment/default/settings/base.py | 6 ++---- kolibri/utils/file_storage.py | 18 ++++-------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/kolibri/deployment/default/settings/base.py b/kolibri/deployment/default/settings/base.py index 927725be4fb..1d7bef34888 100644 --- a/kolibri/deployment/default/settings/base.py +++ b/kolibri/deployment/default/settings/base.py @@ -196,10 +196,8 @@ if not os.environ.get("DEFAULT_FILE_STORAGE"): if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "gcs": - if DEBUG: - DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorageDebug" - else: - DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorage" + DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorage" + BUCKET_NAME = os.getenv("GCS_BUCKET_NAME") or "kdp-csv-reporting-develop" # Internationalization # https://docs.djangoproject.com/en/3.2/topics/i18n/ diff --git a/kolibri/utils/file_storage.py b/kolibri/utils/file_storage.py index b55dad2b6be..443c8107f87 100644 --- a/kolibri/utils/file_storage.py +++ b/kolibri/utils/file_storage.py @@ -1,19 +1,9 @@ -# https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage +from django.conf import settings from storages.backends.gcloud import GoogleCloudStorage # noqa +# https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage + class KolibriFileStorage(GoogleCloudStorage): - # TODO Should this be like the device ID or something? default_acl = "publicRead" - bucket_name = "kolibri" - - -class KolibriFileStorageDebug(KolibriFileStorage): - """ - See: https://github.com/fullstorydev/emulators?tab=readme-ov-file#google-cloud-storage-emulator - Once installed, run `gscemulator -port 7070` - This field being set will allow our GoogleCloudStorage library to interface with the emulator - """ - - custom_endpoint = "http://localhost:7070" - bucket_name = "kolibri_debug_bucket" + bucket_name = settings.BUCKET_NAME From 6be0f401acd934fe09ef49481510eb08cef65ee6 Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 20 Dec 2024 15:00:47 -0800 Subject: [PATCH 08/12] update facility views to use DefaultStorage in csv file response --- kolibri/plugins/facility/views.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/kolibri/plugins/facility/views.py b/kolibri/plugins/facility/views.py index 8960ec75284..fb8a6a1adf2 100644 --- a/kolibri/plugins/facility/views.py +++ b/kolibri/plugins/facility/views.py @@ -1,9 +1,9 @@ -import io import json import os from datetime import datetime as dt from django.core.exceptions import PermissionDenied +from django.core.files.storage import DefaultStorage from django.http import Http404 from django.http import HttpResponse from django.http.response import FileResponse @@ -187,34 +187,31 @@ def download_csv_file(request, csv_type, facility_id): ).replace("-", "_"), } + file_storage = DefaultStorage() + if csv_type in CSV_EXPORT_FILENAMES.keys(): if csv_type == "user": - filepath = os.path.join( - conf.KOLIBRI_HOME, - "log_export", - CSV_EXPORT_FILENAMES[csv_type].format(facility.name, facility.id[:4]), + filename = CSV_EXPORT_FILENAMES[csv_type].format( + facility.name, facility.id[:4] ) else: log_request = _get_log_request(csv_type, facility_id) if log_request: start = log_request.selected_start_date.isoformat() end = log_request.selected_end_date.isoformat() - filepath = os.path.join( - conf.KOLIBRI_HOME, - "log_export", - CSV_EXPORT_FILENAMES[csv_type].format( - facility.name, facility.id[:4], start[:10], end[:10] - ), + + filename = CSV_EXPORT_FILENAMES[csv_type].format( + facility.name, facility.id[:4], start[:10], end[:10] ) else: - filepath = None + filename = None # if the file does not exist on disk, return a 404 - if filepath is None or not os.path.exists(filepath): + if not file_storage.exists(filename): raise Http404("There is no csv export file for {} available".format(csv_type)) # generate a file response - response = FileResponse(io.open(filepath, "rb")) + response = FileResponse(file_storage.open(filename, "rb")) # set the content-type by guessing from the filename response.headers["Content-Type"] = "text/csv" @@ -234,6 +231,6 @@ def download_csv_file(request, csv_type, facility_id): translation.deactivate() # set the content-length to the file size - response.headers["Content-Length"] = os.path.getsize(filepath) + response.headers["Content-Length"] = file_storage.size(filename) return response From f5d0d660e5c1570fc85ec5000b9f319600c65d33 Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 20 Dec 2024 15:02:23 -0800 Subject: [PATCH 09/12] logs csv_export use DefaultStorage --- kolibri/core/logger/csv_export.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py index 6ef35cc24f7..90eadf89ad7 100644 --- a/kolibri/core/logger/csv_export.py +++ b/kolibri/core/logger/csv_export.py @@ -1,8 +1,8 @@ import csv import datetime +import io import logging import math -import os from collections import OrderedDict from dateutil import parser @@ -11,6 +11,7 @@ from django.db.models import Max from django.db.models import OuterRef from django.db.models import Subquery +from django.core.files.storage import DefaultStorage from django.utils.translation import gettext_lazy as _ from django.utils.translation import pgettext_lazy from le_utils.constants import content_kinds @@ -19,7 +20,6 @@ from .models import ContentSummaryLog from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode -from kolibri.core.utils.csv import open_csv_for_writing from kolibri.core.utils.csv import output_mapper @@ -242,6 +242,7 @@ def map_object(item, topic_headers_length): def csv_file_generator( facility, log_type, filepath, start_date, end_date, overwrite=False ): + file_storage = DefaultStorage() if log_type not in ("summary", "session"): raise ValueError( @@ -256,8 +257,9 @@ def csv_file_generator( else parser.parse(end_date) + datetime.timedelta(days=1) ) - if not overwrite and os.path.exists(filepath): - raise ValueError("{} already exists".format(filepath)) + filename = file_storage.generate_filename(filepath.split("/")[-1]) + if not overwrite and file_storage.exists(filename): + raise ValueError("{} already exists".format(filename)) queryset = log_info["queryset"].filter( dataset_id=facility.dataset_id, ) @@ -285,14 +287,28 @@ def csv_file_generator( label for _, label in topic_headers ] - csv_file = open_csv_for_writing(filepath) + csv_file = io.BytesIO() with csv_file as f: - writer = csv.DictWriter(f, header_labels) - logger.info("Creating csv file {filename}".format(filename=filepath)) + writer = csv.DictWriter(io.TextIOWrapper(f, encoding="utf-8"), header_labels) + logger.info( + "Creating {logtype} csv file {filename}".format( + logtype=log_type, filename=filename + ) + ) writer.writeheader() for item in queryset.select_related("user", "user__facility").values( *log_info["db_columns"] ): writer.writerow(map_object(item, len(topic_headers))) yield + + f.seek(0) + file = file_storage.save(filename, f) + + try: + # If the file is local, we can get the path + logger.info("File saved - Path: {}".format(file_storage.path(file))) + except NotImplementedError: + # But if path is not implemented, we assume we can get the URL + logger.info("File saved - Path: {}".format(file_storage.url(file))) From e052bb433206a7888eb75d023576f7864a9004bf Mon Sep 17 00:00:00 2001 From: Jacob Pierce Date: Fri, 20 Dec 2024 15:03:08 -0800 Subject: [PATCH 10/12] WIP - Users CSV export use DefaultStorage --- .../management/commands/bulkexportusers.py | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/kolibri/core/auth/management/commands/bulkexportusers.py b/kolibri/core/auth/management/commands/bulkexportusers.py index 84735e3407e..08b7a5e41e4 100644 --- a/kolibri/core/auth/management/commands/bulkexportusers.py +++ b/kolibri/core/auth/management/commands/bulkexportusers.py @@ -1,4 +1,5 @@ import csv +import io import logging import ntpath import os @@ -6,6 +7,7 @@ from functools import partial from django.conf import settings +from django.core.files.storage import DefaultStorage from django.core.management.base import CommandError from django.db.models import OuterRef from django.db.models import Subquery @@ -26,7 +28,6 @@ from kolibri.core.query import GroupConcatSubquery from kolibri.core.tasks.management.commands.base import AsyncCommand from kolibri.core.tasks.utils import get_current_job -from kolibri.core.utils.csv import open_csv_for_writing from kolibri.core.utils.csv import output_mapper from kolibri.utils import conf @@ -153,17 +154,20 @@ def translate_labels(): def csv_file_generator(facility, filepath, overwrite=True): - if not overwrite and os.path.exists(filepath): - raise ValueError("{} already exists".format(filepath)) + file_storage = DefaultStorage() + filename = file_storage.generate_filename(filepath.split("/")[-1]) + + if not overwrite and file_storage.exists(filename): + raise ValueError("{} already exists".format(filename)) queryset = FacilityUser.objects.filter(facility=facility) header_labels = translate_labels().values() - csv_file = open_csv_for_writing(filepath) + csv_file = io.BytesIO() with csv_file as f: - writer = csv.DictWriter(f, header_labels) - logger.info("Creating csv file {filename}".format(filename=filepath)) + writer = csv.DictWriter(io.TextIOWrapper(f, encoding="utf-8"), header_labels) + logger.info("Creating users csv file {filename}".format(filename=filepath)) writer.writeheader() usernames = set() @@ -203,6 +207,17 @@ def csv_file_generator(facility, filepath, overwrite=True): usernames.add(item["username"]) yield item + f.seek(0) + file = file_storage.save(filename, f) + + try: + # If the file is local, we can get the path + logger.info("File saved - Path: {}".format(file_storage.path(file))) + except NotImplementedError: + # But if path is not implemented, we assume we can get the URL + logger.info("File saved - Path: {}".format(file_storage.url(file))) + logger.info("File saved - Size: {}".format(file_storage.size(file))) + class Command(AsyncCommand): def add_arguments(self, parser): From 953b29b4eda3c1d0a192d92935516d1a135f6d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20L=2E=20Redrejo=20Rodr=C3=ADguez?= Date: Thu, 26 Dec 2024 20:48:17 +0100 Subject: [PATCH 11/12] flushing to ensure all data from the TextIOWrapper is written to the underlying BytesIO buffer --- kolibri/core/auth/management/commands/bulkexportusers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kolibri/core/auth/management/commands/bulkexportusers.py b/kolibri/core/auth/management/commands/bulkexportusers.py index 08b7a5e41e4..97ad1004031 100644 --- a/kolibri/core/auth/management/commands/bulkexportusers.py +++ b/kolibri/core/auth/management/commands/bulkexportusers.py @@ -166,7 +166,8 @@ def csv_file_generator(facility, filepath, overwrite=True): csv_file = io.BytesIO() with csv_file as f: - writer = csv.DictWriter(io.TextIOWrapper(f, encoding="utf-8"), header_labels) + buffer = io.TextIOWrapper(f, encoding="utf-8") + writer = csv.DictWriter(buffer, header_labels) logger.info("Creating users csv file {filename}".format(filename=filepath)) writer.writeheader() usernames = set() @@ -207,6 +208,7 @@ def csv_file_generator(facility, filepath, overwrite=True): usernames.add(item["username"]) yield item + buffer.flush() # Ensure all data is written to the underlying BytesIO f.seek(0) file = file_storage.save(filename, f) From 38368cbc7c85cbb03cc194561c27ef21fac2ece8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 19:52:44 +0000 Subject: [PATCH 12/12] [pre-commit.ci lite] apply automatic fixes --- kolibri/core/logger/csv_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kolibri/core/logger/csv_export.py b/kolibri/core/logger/csv_export.py index 90eadf89ad7..919d0d63867 100644 --- a/kolibri/core/logger/csv_export.py +++ b/kolibri/core/logger/csv_export.py @@ -7,11 +7,11 @@ from dateutil import parser from django.core.cache import cache +from django.core.files.storage import DefaultStorage from django.db.models import F from django.db.models import Max from django.db.models import OuterRef from django.db.models import Subquery -from django.core.files.storage import DefaultStorage from django.utils.translation import gettext_lazy as _ from django.utils.translation import pgettext_lazy from le_utils.constants import content_kinds