Skip to content

Commit

Permalink
Merge pull request #8003 from rtibbles/utf-8part2
Browse files Browse the repository at this point in the history
Fix utf-8 encoding issues for all Python CSV exports
  • Loading branch information
rtibbles authored Apr 14, 2021
2 parents f842833 + 47d1c8f commit 3cb33f2
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 51 deletions.
8 changes: 2 additions & 6 deletions kolibri/core/auth/csv_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from __future__ import unicode_literals

import csv
import io
import logging
import os
import sys
from collections import OrderedDict
from functools import partial

Expand All @@ -18,6 +16,7 @@
from kolibri.core.auth.models import Facility
from kolibri.core.auth.models import FacilityUser
from kolibri.core.query import SQCount
from kolibri.core.utils.csv import open_csv_for_writing


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -186,10 +185,7 @@ def csv_file_generator(facility, filepath, overwrite=True, demographic=False):
column for column in db_columns if demographic or column not in DEMO_FIELDS
)

if sys.version_info[0] < 3:
csv_file = io.open(filepath, "wb")
else:
csv_file = io.open(filepath, "w", newline="")
csv_file = open_csv_for_writing(filepath)

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
Expand Down
8 changes: 2 additions & 6 deletions kolibri/core/auth/management/commands/bulkexportusers.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import csv
import io
import logging
import ntpath
import os
import sys
from collections import OrderedDict
from functools import partial
from tempfile import mkstemp
Expand All @@ -28,6 +26,7 @@
from kolibri.core.query import GroupConcatSubquery
from kolibri.core.tasks.management.commands.base import AsyncCommand
from kolibri.core.tasks.utils import get_current_job
from kolibri.core.utils.csv import open_csv_for_writing
from kolibri.utils import conf

try:
Expand Down Expand Up @@ -161,10 +160,7 @@ def csv_file_generator(facility, filepath, overwrite=True):

header_labels = labels.values()

if sys.version_info[0] < 3:
csv_file = io.open(filepath, "wb")
else:
csv_file = io.open(filepath, "w", newline="")
csv_file = open_csv_for_writing(filepath)

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
Expand Down
8 changes: 5 additions & 3 deletions kolibri/core/auth/management/commands/bulkimportusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from kolibri.core.auth.models import Membership
from kolibri.core.tasks.management.commands.base import AsyncCommand
from kolibri.core.tasks.utils import get_current_job
from kolibri.core.utils.csv import open_csv_for_reading

try:
FileNotFoundError
Expand Down Expand Up @@ -465,8 +466,8 @@ def csv_values_validation(self, reader, header_translation):
)

def csv_headers_validation(self, filepath):
# open using default OS encoding
with open(filepath) as f:
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
header = next(csv.reader(f, strict=True))
has_header = False
self.header_translation = {
Expand Down Expand Up @@ -852,7 +853,8 @@ def handle_async(self, *args, **options):
self.exit_if_error()
self.progress_update(1) # state=csv_headers
try:
with open(filepath) as f:
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
reader = csv.DictReader(f, strict=True)
per_line_errors, classes, users, roles = self.csv_values_validation(
reader, self.header_translation
Expand Down
9 changes: 5 additions & 4 deletions kolibri/core/auth/management/commands/importusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from kolibri.core.auth.models import Classroom
from kolibri.core.auth.models import Facility
from kolibri.core.auth.models import FacilityUser
from kolibri.core.utils.csv import open_csv_for_reading

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -196,8 +197,8 @@ def handle(self, *args, **options):

fieldnames = input_fields + tuple(val for val in labels.values())

# open using default OS encoding
with open(options["filepath"]) as f:
csv_file = open_csv_for_reading(options["filepath"])
with csv_file as f:
header = next(csv.reader(f, strict=True))
has_header = False
if all(col in fieldnames for col in header):
Expand All @@ -212,8 +213,8 @@ def handle(self, *args, **options):
"Mix of valid and invalid header labels found in first row"
)

# open using default OS encoding
with open(options["filepath"]) as f:
csv_file = open_csv_for_reading(options["filepath"])
with csv_file as f:
if has_header:
reader = csv.DictReader(f, strict=True)
else:
Expand Down
7 changes: 2 additions & 5 deletions kolibri/core/auth/test/test_bulk_import.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import csv
import io
import sys
import tempfile
from uuid import uuid4
Expand All @@ -15,6 +14,7 @@
from kolibri.core.auth.constants import role_kinds
from kolibri.core.auth.models import Classroom
from kolibri.core.auth.models import FacilityUser
from kolibri.core.utils.csv import open_csv_for_writing

if sys.version_info[0] < 3:
from cStringIO import StringIO
Expand Down Expand Up @@ -118,10 +118,7 @@ def setUp(self):
def create_csv(self, filepath, rows, remove_uuid=False):
header_labels = list(labels.values())

if sys.version_info[0] < 3:
csv_file = io.open(filepath, "wb")
else:
csv_file = io.open(filepath, "w", newline="")
csv_file = open_csv_for_writing(filepath)

with csv_file as f:
writer = csv.writer(f)
Expand Down
22 changes: 5 additions & 17 deletions kolibri/core/auth/test/test_user_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
Also tests whether the users with permissions can create logs.
"""
import csv
import sys
import tempfile

from django.core.management import call_command
Expand All @@ -21,6 +20,7 @@
from kolibri.core.auth.models import Classroom
from kolibri.core.auth.models import FacilityUser
from kolibri.core.auth.models import LearnerGroup
from kolibri.core.utils.csv import open_csv_for_reading


users = [
Expand Down Expand Up @@ -56,10 +56,7 @@ def test_csv_export_with_demographics(self):
call_command(
"exportusers", output_file=filepath, overwrite=True, demographic=True
)
if sys.version_info[0] < 3:
csv_file = open(filepath, "rb")
else:
csv_file = open(filepath, "r", newline="")
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
results = list(row for row in csv.DictReader(f))

Expand Down Expand Up @@ -91,10 +88,7 @@ def test_csv_export_no_demographics(self):
call_command(
"exportusers", output_file=filepath, overwrite=True, demographic=False
)
if sys.version_info[0] < 3:
csv_file = open(filepath, "rb")
else:
csv_file = open(filepath, "r", newline="")
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
results = list(row for row in csv.DictReader(f))

Expand All @@ -115,10 +109,7 @@ def test_csv_export_user_in_multiple_classes(self):
call_command(
"exportusers", output_file=filepath, overwrite=True, demographic=True
)
if sys.version_info[0] < 3:
csv_file = open(filepath, "rb")
else:
csv_file = open(filepath, "r", newline="")
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
results = list(row for row in csv.DictReader(f))

Expand All @@ -136,10 +127,7 @@ def test_csv_export_user_in_one_class_one_group(self):
call_command(
"exportusers", output_file=filepath, overwrite=True, demographic=True
)
if sys.version_info[0] < 3:
csv_file = open(filepath, "rb")
else:
csv_file = open(filepath, "r", newline="")
csv_file = open_csv_for_reading(filepath)
with csv_file as f:
results = list(row for row in csv.DictReader(f))

Expand Down
17 changes: 12 additions & 5 deletions kolibri/core/auth/test/test_user_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from kolibri.core.auth.constants.demographics import MALE
from kolibri.core.auth.constants.demographics import NOT_SPECIFIED
from kolibri.core.auth.csv_utils import labels
from kolibri.core.utils.csv import open_csv_for_reading
from kolibri.core.utils.csv import open_csv_for_writing


class UserImportTestCase(TestCase):
Expand Down Expand Up @@ -163,7 +165,8 @@ def tearDown(self):
os.remove(self.csvpath)

def importFromRows(self, *args):
with open(self.csvpath, "w") as f:
csv_file = open_csv_for_writing(self.csvpath)
with csv_file as f:
writer = csv.writer(f)
writer.writerows([a for a in args])

Expand Down Expand Up @@ -285,10 +288,12 @@ def test_import_from_export_missing_headers(self):
"exportusers", output_file=self.csvpath, overwrite=True, demographic=True
)
cols_to_remove = ["Facility id", "Gender"]
with open(self.csvpath, "r") as source:
csv_file = open_csv_for_reading(self.csvpath)
with csv_file as source:
reader = csv.DictReader(source)
rows = list(row for row in reader)
with open(self.csvpath, "w") as result:
csv_file = open_csv_for_writing(self.csvpath)
with csv_file as result:
writer = csv.DictWriter(
result,
tuple(
Expand All @@ -314,10 +319,12 @@ def test_import_from_export_mixed_headers(self):
"exportusers", output_file=self.csvpath, overwrite=True, demographic=True
)
cols_to_replace = {"Facility id": "facility", "Gender": "gender"}
with open(self.csvpath, "r") as source:
csv_file = open_csv_for_reading(self.csvpath)
with csv_file as source:
reader = csv.DictReader(source)
rows = list(row for row in reader)
with open(self.csvpath, "w") as result:
csv_file = open_csv_for_writing(self.csvpath)
with csv_file as result:
writer = csv.DictWriter(
result,
tuple(
Expand Down
7 changes: 2 additions & 5 deletions kolibri/core/logger/csv_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import logging
import math
import os
import sys
from collections import OrderedDict

from django.core.cache import cache
Expand All @@ -23,6 +22,7 @@
from kolibri.core.auth.models import Facility
from kolibri.core.content.models import ChannelMetadata
from kolibri.core.content.models import ContentNode
from kolibri.core.utils.csv import open_csv_for_writing
from kolibri.utils import conf


Expand Down Expand Up @@ -151,10 +151,7 @@ def csv_file_generator(facility, log_type, filepath, overwrite=False):
if log_type == "summary" or label != "completion_timestamp"
)

if sys.version_info[0] < 3:
csv_file = io.open(filepath, "wb")
else:
csv_file = io.open(filepath, "w", newline="", encoding="utf-8")
csv_file = open_csv_for_writing(filepath)

with csv_file as f:
writer = csv.DictWriter(f, header_labels)
Expand Down
14 changes: 14 additions & 0 deletions kolibri/core/utils/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import io
import sys


def open_csv_for_writing(filepath):
if sys.version_info[0] < 3:
return io.open(filepath, "wb")
return io.open(filepath, "w", newline="", encoding="utf-8-sig")


def open_csv_for_reading(filepath):
if sys.version_info[0] < 3:
return io.open(filepath, "rb")
return io.open(filepath, "r", newline="", encoding="utf-8-sig")

0 comments on commit 3cb33f2

Please sign in to comment.