Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add file storage option #12590

Draft
wants to merge 10 commits into
base: release-v0.17.x
Choose a base branch
from
64 changes: 35 additions & 29 deletions kolibri/core/auth/csv_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import unicode_literals

import csv
import io
import logging
import os
from collections import OrderedDict
from functools import partial

from django.core.files.storage import DefaultStorage
from django.db.models import OuterRef
from django.db.models import Q

Expand All @@ -16,9 +17,10 @@
from kolibri.core.auth.models import Facility
from kolibri.core.auth.models import FacilityUser
from kolibri.core.query import SQCount
from kolibri.core.utils.csv import open_csv_for_writing
from kolibri.core.utils.csv import output_mapper

# from kolibri.core.utils.csv import open_csv_for_writing


logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,9 +163,11 @@ def map_input(obj):
)


def csv_file_generator(facility, filepath, overwrite=True, demographic=False):
if not overwrite and os.path.exists(filepath):
raise ValueError("{} already exists".format(filepath))
def csv_file_generator(facility, filename, overwrite=True, demographic=False):
fs = DefaultStorage()

# if not overwrite and os.path.exists(filename):
# raise ValueError("{} already exists".format(filepath))
queryset = FacilityUser.objects.filter(facility=facility)

header_labels = tuple(
Expand All @@ -176,7 +180,7 @@ def csv_file_generator(facility, filepath, overwrite=True, demographic=False):
column for column in db_columns if demographic or column not in DEMO_FIELDS
)

csv_file = open_csv_for_writing(filepath)
csv_file = io.StringIO() # open_csv_for_writing(filepath)

mappings = {}

Expand All @@ -186,27 +190,29 @@ def csv_file_generator(facility, filepath, overwrite=True, demographic=False):

map_output = partial(output_mapper, labels=labels, output_mappings=mappings)

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
logger.info("Creating csv file {filename}".format(filename=filepath))
writer.writeheader()
usernames = set()
for item in (
queryset.select_related("facility")
.annotate(
classroom_count=SQCount(
Classroom.objects.filter(membership__user=OuterRef("id")),
field="id",
)
)
.prefetch_related("memberships__collection")
.filter(
Q(memberships__collection__kind=CLASSROOM)
| Q(memberships__collection__isnull=True)
writer = csv.DictWriter(csv_file, header_labels)
logger.info("Creating csv file {filename}".format(filename=filename))
writer.writeheader()
usernames = set()
for item in (
queryset.select_related("facility")
.annotate(
classroom_count=SQCount(
Classroom.objects.filter(membership__user=OuterRef("id")),
field="id",
)
.values(*columns)
):
if item["username"] not in usernames:
writer.writerow(map_output(item))
usernames.add(item["username"])
yield
)
.prefetch_related("memberships__collection")
.filter(
Q(memberships__collection__kind=CLASSROOM)
| Q(memberships__collection__isnull=True)
)
.values(*columns)
):
if item["username"] not in usernames:
writer.writerow(map_output(item))
usernames.add(item["username"])
yield
csv_file.seek(0)
file = fs.save(filename, csv_file)
logger.info("File saved - Path: {} URL: {}".format(fs.path(file), fs.url(file)))
27 changes: 21 additions & 6 deletions kolibri/core/auth/management/commands/bulkexportusers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import csv
import io
import logging
import ntpath
import os
from collections import OrderedDict
from functools import partial

from django.conf import settings
from django.core.files.storage import DefaultStorage
from django.core.management.base import CommandError
from django.db.models import OuterRef
from django.db.models import Subquery
Expand All @@ -26,7 +28,6 @@
from kolibri.core.query import GroupConcatSubquery
from kolibri.core.tasks.management.commands.base import AsyncCommand
from kolibri.core.tasks.utils import get_current_job
from kolibri.core.utils.csv import open_csv_for_writing
from kolibri.core.utils.csv import output_mapper
from kolibri.utils import conf

Expand Down Expand Up @@ -153,17 +154,20 @@ def translate_labels():


def csv_file_generator(facility, filepath, overwrite=True):
if not overwrite and os.path.exists(filepath):
raise ValueError("{} already exists".format(filepath))
file_storage = DefaultStorage()
filename = file_storage.generate_filename(filepath.split("/")[-1])

if not overwrite and file_storage.exists(filename):
raise ValueError("{} already exists".format(filename))
queryset = FacilityUser.objects.filter(facility=facility)

header_labels = translate_labels().values()

csv_file = open_csv_for_writing(filepath)
csv_file = io.BytesIO()

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
logger.info("Creating csv file {filename}".format(filename=filepath))
writer = csv.DictWriter(io.TextIOWrapper(f, encoding="utf-8"), header_labels)
logger.info("Creating users csv file {filename}".format(filename=filepath))
writer.writeheader()
usernames = set()

Expand Down Expand Up @@ -203,6 +207,17 @@ def csv_file_generator(facility, filepath, overwrite=True):
usernames.add(item["username"])
yield item

f.seek(0)
file = file_storage.save(filename, f)

try:
# If the file is local, we can get the path
logger.info("File saved - Path: {}".format(file_storage.path(file)))
except NotImplementedError:
# But if path is not implemented, we assume we can get the URL
logger.info("File saved - Path: {}".format(file_storage.url(file)))
logger.info("File saved - Size: {}".format(file_storage.size(file)))
Comment on lines +218 to +219
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jredrejo I've tried several things around here, but no matter what I do I the file always shows size 0... I've confirmed that there are users (usernames is full of data, for example) -- so I'm not sure why the writer isn't updating the file object here...



class Command(AsyncCommand):
def add_arguments(self, parser):
Expand Down
5 changes: 1 addition & 4 deletions kolibri/core/auth/management/commands/exportusers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import logging
import os
import sys

from django.core.management.base import CommandError
Expand Down Expand Up @@ -57,15 +56,13 @@ def handle_async(self, *args, **options):
else:
filename = options["output_file"]

filepath = os.path.join(os.getcwd(), filename)

total_rows = FacilityUser.objects.filter(facility=facility).count()

with self.start_progress(total=total_rows) as progress_update:
try:
for row in csv_file_generator(
facility,
filepath,
filename,
overwrite=options["overwrite"],
demographic=options["demographic"],
):
Expand Down
30 changes: 23 additions & 7 deletions kolibri/core/logger/csv_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

import csv
import datetime
import io
import logging
import math
import os
from collections import OrderedDict
from functools import partial

from dateutil import parser
from django.core.cache import cache
from django.core.files.storage import DefaultStorage
from django.utils.translation import gettext_lazy as _
from django.utils.translation import pgettext_lazy
from le_utils.constants import content_kinds
Expand All @@ -18,7 +19,6 @@
from .models import ContentSummaryLog
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.utils.csv import open_csv_for_writing
from kolibri.core.utils.csv import output_mapper


Expand Down Expand Up @@ -146,6 +146,7 @@ def cache_content_title(obj):
def csv_file_generator(
facility, log_type, filepath, start_date, end_date, overwrite=False
):
file_storage = DefaultStorage()

if log_type not in ("summary", "session"):
raise ValueError(
Expand All @@ -160,8 +161,9 @@ def csv_file_generator(
else parser.parse(end_date) + datetime.timedelta(days=1)
)

if not overwrite and os.path.exists(filepath):
raise ValueError("{} already exists".format(filepath))
filename = file_storage.generate_filename(filepath.split("/")[-1])
if not overwrite and file_storage.exists(filename):
raise ValueError("{} already exists".format(filename))
queryset = log_info["queryset"].filter(
dataset_id=facility.dataset_id,
)
Expand All @@ -179,14 +181,28 @@ def csv_file_generator(
if log_type == "summary" or label != labels["completion_timestamp"]
)

csv_file = open_csv_for_writing(filepath)
csv_file = io.BytesIO()

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
logger.info("Creating csv file {filename}".format(filename=filepath))
writer = csv.DictWriter(io.TextIOWrapper(f, encoding="utf-8"), header_labels)
logger.info(
"Creating {logtype} csv file {filename}".format(
logtype=log_type, filename=filename
)
)
writer.writeheader()
for item in queryset.select_related("user", "user__facility").values(
*log_info["db_columns"]
):
writer.writerow(map_object(item))
yield

f.seek(0)
file = file_storage.save(filename, f)

try:
# If the file is local, we can get the path
logger.info("File saved - Path: {}".format(file_storage.path(file)))
except NotImplementedError:
# But if path is not implemented, we assume we can get the URL
logger.info("File saved - Path: {}".format(file_storage.url(file)))
8 changes: 8 additions & 0 deletions kolibri/deployment/default/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"


# File Storage Backend
# https://docs.djangoproject.com/en/3.2/ref/files/storage/

if not os.environ.get("DEFAULT_FILE_STORAGE"):
if conf.OPTIONS["FileStorage"]["STORAGE_BACKEND"] == "gcs":
DEFAULT_FILE_STORAGE = "kolibri.utils.file_storage.KolibriFileStorage"
BUCKET_NAME = os.getenv("GCS_BUCKET_NAME") or "kdp-csv-reporting-develop"

# Internationalization
# https://docs.djangoproject.com/en/3.2/topics/i18n/

Expand Down
27 changes: 12 additions & 15 deletions kolibri/plugins/facility/views.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import io
import json
import os
from datetime import datetime as dt

from django.core.exceptions import PermissionDenied
from django.core.files.storage import DefaultStorage
from django.http import Http404
from django.http import HttpResponse
from django.http.response import FileResponse
Expand Down Expand Up @@ -187,34 +187,31 @@ def download_csv_file(request, csv_type, facility_id):
).replace("-", "_"),
}

file_storage = DefaultStorage()

if csv_type in CSV_EXPORT_FILENAMES.keys():
if csv_type == "user":
filepath = os.path.join(
conf.KOLIBRI_HOME,
"log_export",
CSV_EXPORT_FILENAMES[csv_type].format(facility.name, facility.id[:4]),
filename = CSV_EXPORT_FILENAMES[csv_type].format(
facility.name, facility.id[:4]
)
else:
log_request = _get_log_request(csv_type, facility_id)
if log_request:
start = log_request.selected_start_date.isoformat()
end = log_request.selected_end_date.isoformat()
filepath = os.path.join(
conf.KOLIBRI_HOME,
"log_export",
CSV_EXPORT_FILENAMES[csv_type].format(
facility.name, facility.id[:4], start[:10], end[:10]
),

filename = CSV_EXPORT_FILENAMES[csv_type].format(
facility.name, facility.id[:4], start[:10], end[:10]
)
else:
filepath = None
filename = None

# if the file does not exist on disk, return a 404
if filepath is None or not os.path.exists(filepath):
if not file_storage.exists(filename):
raise Http404("There is no csv export file for {} available".format(csv_type))

# generate a file response
response = FileResponse(io.open(filepath, "rb"))
response = FileResponse(file_storage.open(filename, "rb"))
# set the content-type by guessing from the filename
response.headers["Content-Type"] = "text/csv"

Expand All @@ -234,6 +231,6 @@ def download_csv_file(request, csv_type, facility_id):
translation.deactivate()

# set the content-length to the file size
response.headers["Content-Length"] = os.path.getsize(filepath)
response.headers["Content-Length"] = file_storage.size(filename)

return response
9 changes: 9 additions & 0 deletions kolibri/utils/file_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.conf import settings
from storages.backends.gcloud import GoogleCloudStorage # noqa

# https://django-storages.readthedocs.io/en/latest/backends/gcloud.html#google-cloud-storage


class KolibriFileStorage(GoogleCloudStorage):
default_acl = "publicRead"
bucket_name = settings.BUCKET_NAME
Loading
Loading