Skip to content

Commit

Permalink
Merge pull request #4827 from aronasorman/extract-subtitles-and-asses…
Browse files Browse the repository at this point in the history
…sment-items

Extract subtitles and assessment items into the right directories
  • Loading branch information
rtibbles committed Feb 18, 2016
2 parents ff1d0a8 + a637065 commit 653cef9
Show file tree
Hide file tree
Showing 23 changed files with 99 additions and 434 deletions.
58 changes: 56 additions & 2 deletions kalite/distributed/management/commands/retrievecontentpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@

from fle_utils.general import ensure_dir

from kalite.contentload import settings as content_settings
from kalite.i18n.base import lcode_to_django_lang, get_po_filepath, get_locale_path, \
update_jsi18n_file
update_jsi18n_file, get_srt_path as get_subtitle_path
from kalite.topic_tools import settings
from kalite.updates.management.commands.classes import UpdatesStaticCommand

Expand Down Expand Up @@ -82,7 +83,9 @@ def process_content_pack(self, zf, lang):
extract_catalog_files(zf, lang)
update_jsi18n_file(lang)
extract_content_db(zf, lang)
extract_content_pack_metadata(zf, lang)
extract_subtitles(zf, lang)
extract_content_pack_metadata(zf, lang) # always extract to the en lang
extract_assessment_items(zf, "en")

self.next_stage(_("Looking for available content items."))
call_command("annotate_content_items", language=lang)
Expand Down Expand Up @@ -140,3 +143,54 @@ def extract_content_db(zf, lang):
with open(content_db_path, "wb") as f:
dbfobj = zf.open("content.db")
shutil.copyfileobj(dbfobj, f)


def extract_subtitles(zf, lang):
SUBTITLE_DEST_DIR = get_subtitle_path(lang_code=lang)
SUBTITLE_ZIP_DIR = "subtitles/"

ensure_dir(SUBTITLE_DEST_DIR)

subtitles = (s for s in zf.namelist() if SUBTITLE_ZIP_DIR in s)

for subtitle in subtitles:
# files inside zipfiles may come with leading directories in their
# names, like subtitles/hotdog.vtt. We'll only want the actual filename
# (hotdog.vtt) when extracting as that's what KA Lite expects.

subtitle_filename = os.path.basename(subtitle)
subtitle_dest_path = os.path.join(SUBTITLE_DEST_DIR, subtitle_filename)

subtitle_fileobj = zf.open(subtitle)

with open(subtitle_dest_path, "w") as dest_fileobj:
shutil.copyfileobj(subtitle_fileobj, dest_fileobj)


def extract_assessment_items(zf, lang):
assessment_zip_dir = "assessment_resources/"

channel = "khan"

assessment_dest_dir = os.path.join(content_settings.ASSESSMENT_ITEM_ROOT, channel)

ensure_dir(assessment_dest_dir)

items = (s for s in zf.namelist() if assessment_zip_dir in s)

for item in items:
# files inside zipfiles may come with leading directories in their
# names, like subtitles/hotdog.vtt. We'll only want the actual filename
# (hotdog.vtt) when extracting as that's what KA Lite expects.

filename = os.path.basename(item)
subfolder = filename[:3]
assessment_subfolder = os.path.join(assessment_dest_dir, subfolder)

ensure_dir(assessment_subfolder)

zip_item_fileobj = zf.open(item)

assessment_dest_path = os.path.join(assessment_subfolder, filename)
with open(assessment_dest_path, "w") as item_fileobj:
shutil.copyfileobj(zip_item_fileobj, item_fileobj)
40 changes: 21 additions & 19 deletions kalite/distributed/management/commands/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- Run migrations
- Find and relocate obsolete user and data files
- if interactive:
- Download and unpack assessment items
- Download and unpack the english content pack, containing assessment items
- Create super user account
- Run 'kalite start'
"""
Expand All @@ -29,19 +29,17 @@

import kalite
from kalite.contentload.settings import KHAN_ASSESSMENT_ITEM_ROOT, OLD_ASSESSMENT_ITEMS_LOCATION
from kalite.topic_tools.settings import CHANNEL

from fle_utils.config.models import Settings
from fle_utils.general import get_host_name
from fle_utils.platforms import is_windows
from kalite.version import VERSION, SHORTVERSION
from kalite.distributed.management.commands.retrievecontentpack import CONTENT_PACK_URL_TEMPLATE
from kalite.facility.models import Facility
from kalite.version import VERSION, SHORTVERSION
from securesync.models import Device
import warnings


# for extracting assessment item resources
ASSESSMENT_ITEMS_ZIP_URL = "https://learningequality.org/downloads/ka-lite/{version}/content/{channel}_assessment.zip".format(version=SHORTVERSION, channel=CHANNEL)
CONTENTPACK_URL = CONTENT_PACK_URL_TEMPLATE.format(version=SHORTVERSION, code="en")


def raw_input_yn(prompt):
Expand Down Expand Up @@ -139,7 +137,7 @@ def validate_filename(filename):
return False

def find_recommended_file():
filename_guess = "{channel}_assessment.zip".format(channel=CHANNEL)
filename_guess = "en.zip"
curdir = os.path.abspath(os.curdir)
pardir = os.path.abspath(os.path.join(curdir, os.pardir))
while curdir != pardir:
Expand All @@ -152,8 +150,9 @@ def find_recommended_file():
return ""

recommended_filename = find_recommended_file()
prompt = "Please enter the filename of the assessment items package you have downloaded (%s): " % recommended_filename
prompt = "Please enter the filename of the content pack you have downloaded (%s): " % recommended_filename
filename = raw_input(prompt)
filename = os.path.expanduser(filename)
if not filename:
filename = recommended_filename
while not validate_filename(filename):
Expand Down Expand Up @@ -204,12 +203,12 @@ class Command(BaseCommand):
action='store_true',
dest='force-assessment-item-dl',
default=False,
help='Downloads assessment items from the url specified by ASSESSMENT_ITEMS_ZIP_URL, without interaction'),
help='Downloads content pack from the url specified by CONTENTPACK_URL, without interaction'),
make_option('-i', '--no-assessment-items',
action='store_true',
dest='no-assessment-items',
default=False,
help='Skip all steps associating with assessment item downloading or the assessment item database'),
help='Skip all steps associating with content pack downloading or the content database'),
make_option('-g', '--git-migrate',
action='store',
dest='git_migrate_path',
Expand Down Expand Up @@ -387,12 +386,12 @@ def handle(self, *args, **options):
call_command("migrate", merge=True, verbosity=options.get("verbosity"))
Settings.set("database_version", VERSION)

# download assessment items
# download the english content pack
# This can take a long time and lead to Travis stalling. None of this
# is required for tests, and does not apply to the central server.
if options.get("no-assessment-items", False):

logging.warning("Skipping assessment item downloading and configuration.")
logging.warning("Skipping content pack downloading and configuration.")

else:

Expand All @@ -411,7 +410,7 @@ def handle(self, *args, **options):

if writable_assessment_items and options['force-assessment-item-dl']:
call_command(
"unpack_assessment_zip", ASSESSMENT_ITEMS_ZIP_URL)
"retrievecontentpack", "download", "en")
elif options['force-assessment-item-dl']:
raise RuntimeError(
"Got force-assessment-item-dl but directory not writable")
Expand All @@ -423,20 +422,23 @@ def handle(self, *args, **options):
print(
"If you have already downloaded the assessment items package, you can specify the file in the next step.")
print("Otherwise, we will download it from {url}.".format(
url=ASSESSMENT_ITEMS_ZIP_URL))
url=CONTENTPACK_URL))

if raw_input_yn("Do you wish to download the assessment items package now?"):
ass_item_filename = ASSESSMENT_ITEMS_ZIP_URL
elif raw_input_yn("Have you already downloaded the assessment items package?"):
if raw_input_yn("Do you wish to download the content pack now?"):
ass_item_filename = CONTENTPACK_URL
retrieval_method = "download"
elif raw_input_yn("Have you already downloaded the content pack?"):
ass_item_filename = get_assessment_items_filename()
retrieval_method = "local"
else:
ass_item_filename = None
retrieval_method = "local"

if not ass_item_filename:
logging.warning(
"No assessment items package file given. You will need to download and unpack it later.")
"No content pack given. You will need to download and unpack it later.")
else:
call_command("unpack_assessment_zip", ass_item_filename)
call_command("retrievecontentpack", retrieval_method, "en", ass_item_filename)

elif options['interactive']:
logging.warning(
Expand Down
2 changes: 1 addition & 1 deletion kalite/distributed/templates/distributed/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

STATIC_URL : "{% static "" %}",

ALL_ASSESSMENT_ITEMS_URL : "{% url 'api_dispatch_list' resource_name='assessment_item' %}",
ALL_ASSESSMENT_ITEMS_URL : "{% url 'assessment_item' assessment_item='' %}",
GET_VIDEO_LOGS_URL : "{% url 'api_dispatch_list' resource_name='videolog' %}",
GET_EXERCISE_LOGS_URL : "{% url 'api_dispatch_list' resource_name='exerciselog' %}",
GET_CONTENT_LOGS_URL : "{% url 'api_dispatch_list' resource_name='contentlog' %}",
Expand Down
2 changes: 0 additions & 2 deletions kalite/distributed/tests/browser_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from .base import *
# from .coachreports import *
from .control_panel import *
from .distributed import *
from .knowledge_map import *
from .language_packs import *
2 changes: 1 addition & 1 deletion kalite/distributed/tests/browser_tests/control_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_ungrouped_number_displays_correctly(self):
"""
Ungrouped # of students wasn't displaying correctly, see: https://github.com/learningequality/ka-lite/pull/2230
In particular it seems to have only occurred when a non-english language was set, so this test tried to
mock a language pack download -- but that makes the test dependent on languagepackdownload.py details :(
mock a language pack download
"""
facility = self.facility
params = {
Expand Down
57 changes: 0 additions & 57 deletions kalite/distributed/tests/browser_tests/language_packs.py

This file was deleted.

2 changes: 0 additions & 2 deletions kalite/i18n/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
from base import *
from browser_tests import *
47 changes: 0 additions & 47 deletions kalite/i18n/tests/base.py

This file was deleted.

30 changes: 0 additions & 30 deletions kalite/i18n/tests/browser_tests.py

This file was deleted.

1 change: 0 additions & 1 deletion kalite/i18n/tests/create_dummy_language_pack_tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import accenting
import requests
import zipfile
from cStringIO import StringIO
Expand Down
Binary file removed kalite/i18n/tests/testzips/de.zip
Binary file not shown.
Binary file removed kalite/i18n/tests/testzips/it.zip
Binary file not shown.
Binary file removed kalite/i18n/tests/testzips/pt-BR.zip
Binary file not shown.
Loading

0 comments on commit 653cef9

Please sign in to comment.