Merge pull request #41 from jayoshih/pressure-on

Pressurecooker Integration
learningequality · Jan 12, 2017 · d97da95 · d97da95
2 parents 6f8d120 + 55329fc
commit d97da95
Show file tree

Hide file tree

Showing 11 changed files with 202 additions and 49 deletions.
diff --git a/ricecooker/__init__.py b/ricecooker/__init__.py
@@ -2,4 +2,4 @@
 
 __author__ = 'Learning Equality'
 __email__ = '[email protected]'
-__version__ = '0.3.13'
+__version__ = '0.3.14'
diff --git a/ricecooker/__main__.py b/ricecooker/__main__.py
@@ -1,5 +1,5 @@
 
-"""Usage: ricecooker uploadchannel [-huv] <file_path> [--warn] [--token=<t>] [--resume [--step=<step>] | --reset] [--prompt] [--publish] [[OPTIONS] ...]
+"""Usage: ricecooker uploadchannel [-huv] <file_path> [--warn] [--compress] [--token=<t>] [--resume [--step=<step>] | --reset] [--prompt] [--publish] [[OPTIONS] ...]
 
 Arguments:
   file_path        Path to file with channel data
@@ -9,6 +9,7 @@
   -v               Verbose mode
   -u               Re-download files from file paths
   --warn           Print out warnings to stderr
+  --compress       Compress high resolution videos to low resolution videos
   --token=<t>      Authorization token (can be token or path to file with token) [default: #]
   --resume         Resume from ricecooker step (cannot be used with --reset flag)
   --step=<step>    Step to resume progress from (must be used with --resume flag) [default: last]
@@ -23,6 +24,7 @@
   CONSTRUCT_CHANNEL:    Resume with call to construct channel
   CREATE_TREE:          Resume at set tree relationships
   DOWNLOAD_FILES:       Resume at beginning of download process
+  COMPRESS_FILES:       Resume at video compression step
   GET_FILE_DIFF:        Resume at call to get file diff from Kolibri Studio
   START_UPLOAD:         Resume at beginning of uploading files to Kolibri Studio
   UPLOADING_FILES:      Resume at last upload request
@@ -68,4 +70,5 @@
                   prompt=arguments['--prompt'],
                   publish=arguments['--publish'],
                   warnings=arguments['--warn'],
+                  compress=arguments['--compress'],
                   **kwargs)
diff --git a/ricecooker/classes/nodes.py b/ricecooker/classes/nodes.py
@@ -111,6 +111,7 @@ class Channel(Node):
             description (str): description of the channel (optional)
             thumbnail (str): file path or url of channel's thumbnail (optional)
     """
+    thumbnail_preset = format_presets.CHANNEL_THUMBNAIL
     def __init__(self, channel_id, domain, title, description=None, thumbnail=None):
         # Map parameters to model variables
         self.domain = domain
@@ -290,6 +291,7 @@ class Video(ContentNode):
             thumbnail (str): local path or url to thumbnail image (optional)
     """
     default_preset = format_presets.VIDEO_HIGH_RES
+    thumbnail_preset = format_presets.VIDEO_THUMBNAIL
     def __init__(self, id, title, files, author="", description="", transcode_to_lower_resolutions=False, derive_thumbnail=False, license=None, subtitle=None, preset=None, thumbnail=None):
         self.kind = content_kinds.VIDEO
         self.derive_thumbnail = derive_thumbnail
@@ -358,7 +360,7 @@ class Audio(ContentNode):
             license (str): content's license based on le_utils.constants.licenses (optional)
             thumbnail (str): local path or url to thumbnail image (optional)
     """
-
+    thumbnail_preset = format_presets.AUDIO_THUMBNAIL
     default_preset = format_presets.AUDIO
     def __init__(self, id, title, files, author="", description="", license=None, subtitle=None, thumbnail=None):
         self.kind = content_kinds.AUDIO
@@ -404,6 +406,7 @@ class Document(ContentNode):
             thumbnail (str): local path or url to thumbnail image (optional)
     """
     default_preset = format_presets.DOCUMENT
+    thumbnail_preset = format_presets.DOCUMENT_THUMBNAIL
     def __init__(self, id, title, files, author="", description="", license=None, thumbnail=None):
         self.kind = content_kinds.DOCUMENT
         super(Document, self).__init__(id, title, description=description, author=author, license=license, files=files, thumbnail=thumbnail)
@@ -450,6 +453,7 @@ class Exercise(ContentNode):
             thumbnail (str): local path or url to thumbnail image (optional)
     """
     default_preset = format_presets.EXERCISE
+    thumbnail_preset = format_presets.EXERCISE_THUMBNAIL
     def __init__(self, id, title, files, author="", description="", license=None, exercise_data=None, thumbnail=None):
         self.kind = content_kinds.EXERCISE
         self.questions = []
@@ -551,7 +555,7 @@ class HTML5App(ContentNode):
     """
 
     default_preset = format_presets.HTML5_ZIP
-
+    thumbnail_preset = format_presets.HTML5_THUMBNAIL
     def __init__(self, id, title, files, author="", description="", license=None, thumbnail=None):
         self.kind = content_kinds.HTML5
         files = [] if files is None else files

diff --git a/ricecooker/classes/questions.py b/ricecooker/classes/questions.py
@@ -119,7 +119,7 @@ def parse_html(self, text):
                 text (str): text to parse
             Returns: string with properly formatted images
         """
-        bs = BeautifulSoup(text, "html.parser")
+        bs = BeautifulSoup(text, "html5lib")
         file_reg = re.compile(FILE_REGEX, flags=re.IGNORECASE)
         tags = bs.findAll('img')
 

diff --git a/ricecooker/commands.py b/ricecooker/commands.py
@@ -11,15 +11,14 @@
 from .managers.tree import ChannelManager
 from importlib.machinery import SourceFileLoader
 
-
 # Fix to support Python 2.x.
 # http://stackoverflow.com/questions/954834/how-do-i-use-raw-input-in-python-3
 try:
     input = raw_input
 except NameError:
     pass
 
-def uploadchannel(path, verbose=False, update=False, resume=False, reset=False, step=Status.LAST.name, token="#", prompt=False, publish=False, warnings=False, **kwargs):
+def uploadchannel(path, verbose=False, update=False, resume=False, reset=False, step=Status.LAST.name, token="#", prompt=False, publish=False, warnings=False, compress=False, **kwargs):
     """ uploadchannel: Upload channel to Kolibri Studio server
         Args:
             path (str): path to file containing construct_channel method
@@ -32,6 +31,7 @@ def uploadchannel(path, verbose=False, update=False, resume=False, reset=False,
             prompt (bool): indicates whether to prompt user to open channel when done (optional)
             publish (bool): indicates whether to automatically publish channel (optional)
             warnings (bool): indicates whether to print out warnings (optional)
+            compress (bool): indicates whether to compress larger files (optional)
             kwargs (dict): keyword arguments to pass to sushi chef (optional)
         Returns: (str) link to access newly created channel
     """
@@ -41,6 +41,7 @@ def uploadchannel(path, verbose=False, update=False, resume=False, reset=False,
     config.WARNING = warnings
     config.TOKEN = token
     config.UPDATE = update
+    config.COMPRESS = compress
 
     # Get domain to upload to
     config.init_file_mapping_store()
@@ -93,6 +94,10 @@ def uploadchannel(path, verbose=False, update=False, resume=False, reset=False,
     if config.PROGRESS_MANAGER.get_status_val() <= Status.DOWNLOAD_FILES.value:
         config.PROGRESS_MANAGER.set_files(*process_tree_files(tree))
 
+    # Compress files if they haven't been compressed already
+    if config.PROGRESS_MANAGER.get_status_val() <= Status.COMPRESS_FILES.value:
+        config.PROGRESS_MANAGER.set_compressed_files(*compress_tree_files(tree))
+
     # Set download manager in case steps were skipped
     config.DOWNLOADER.files = config.PROGRESS_MANAGER.files_downloaded
     config.DOWNLOADER.failed_files = config.PROGRESS_MANAGER.files_failed
@@ -219,6 +224,21 @@ def process_tree_files(tree):
         sys.stderr.write("\n")
     return config.DOWNLOADER.get_files(), config.DOWNLOADER.get_file_mapping(), config.DOWNLOADER.failed_files
 
+def compress_tree_files(tree):
+    """ compress_tree_files: Compress files from nodes
+        Args:
+            tree (ChannelManager): manager to handle communication to Kolibri Studio
+        Returns: None
+    """
+    if config.COMPRESS:
+        if config.VERBOSE:
+            sys.stderr.write("\nCompressing files...")
+        tree.compress_tree(tree.channel)
+        config.set_file_store(config.DOWNLOADER.file_store)
+        if config.VERBOSE:
+            sys.stderr.write("\n")
+    return config.DOWNLOADER.get_files(), config.DOWNLOADER.get_file_mapping(), config.DOWNLOADER.failed_files
+
 def get_file_diff(tree):
     """ get_file_diff: Download files from nodes
         Args:
@@ -242,6 +262,7 @@ def upload_files(tree, file_diff):
     """
     # Upload new files to CC
     tree.upload_files(file_diff)
+    tree.reattempt_upload_fails()
     if config.VERBOSE:
         sys.stderr.write("\n")
     return file_diff

diff --git a/ricecooker/config.py b/ricecooker/config.py
@@ -7,6 +7,7 @@
 WARNING = False
 UPDATE = False
 VERBOSE = False
+COMPRESS = False
 TOKEN = "#"
 PROGRESS_MANAGER = None
 DOWNLOADER = None

diff --git a/ricecooker/managers/downloader.py b/ricecooker/managers/downloader.py
@@ -8,6 +8,8 @@
 import sys
 import requests
 from enum import Enum
+from pressurecooker.videos import extract_thumbnail_from_video, check_video_resolution, compress_video
+from pressurecooker.encodings import get_base64_encoding, write_base64_to_file
 from requests_file import FileAdapter
 from requests.exceptions import MissingSchema, HTTPError, ConnectionError, InvalidURL, InvalidSchema
 from .. import config
@@ -132,7 +134,7 @@ def download_graphie(self, path, title):
         except (HTTPError, ConnectionError, InvalidURL, InvalidSchema, IOError):
             self.failed_files += [(path,title)]
             return False;
-        except (HTTPError, FileNotFoundError, ConnectionError, InvalidURL, UnicodeDecodeError, InvalidSchema, IOError):
+        except (HTTPError, ConnectionError, InvalidURL, UnicodeDecodeError, UnicodeError, InvalidSchema, IOError):
             self.failed_files += [(path, title)]
             return False
 
@@ -193,10 +195,10 @@ def track_existing_file(self, path):
         data = self.file_store[path]
         if config.VERBOSE:
             sys.stderr.write("\n\tFile {0} already exists (add '-u' flag to update)".format(data['filename']))
-        self.track_file(data['filename'], data['size'],  data['preset'], original_filename=data['original_filename'])
+        self.track_file(data['filename'], data['size'],  data.get('preset'), original_filename=data.get('original_filename'), extracted=data.get("extracted"))
         return self._file_mapping[data['filename']]
 
-    def download_file(self, path, title, default_ext=None, preset=None):
+    def download_file(self, path, title, default_ext=None, preset=None, extracted=False, original_filepath=None):
         """ download_file: downloads file from path
             Args:
                 path (str): local path or url to file to download
@@ -210,10 +212,16 @@ def download_file(self, path, title, default_ext=None, preset=None):
             if exercises.CONTENT_STORAGE_PLACEHOLDER in path:
                 return self._file_mapping[os.path.split(path)[-1]]
 
-            if self.check_downloaded_file(path):
-                return self.track_existing_file(path)
+            if not original_filepath:
+                original_filepath = path
 
-            if config.VERBOSE:
+            if self.check_downloaded_file(original_filepath):
+                return self.track_existing_file(original_filepath)
+
+            if get_base64_encoding(path):
+                return self.convert_base64_to_file(path, title, preset=preset)
+
+            if config.VERBOSE and not extracted:
                 sys.stderr.write("\n\tDownloading {}".format(path))
 
             hash=self.get_hash(path)
@@ -233,8 +241,11 @@ def download_file(self, path, title, default_ext=None, preset=None):
                 if config.VERBOSE:
                     sys.stderr.write("\n\t--- No changes detected on {0}".format(filename))
 
+                if extension == file_formats.MP4:
+                    preset = check_video_resolution(config.get_storage_path(filename))
+
                 # Keep track of downloaded file
-                self.track_file(filename, os.path.getsize(config.get_storage_path(filename)), preset, path)
+                self.track_file(filename, os.path.getsize(config.get_storage_path(filename)), preset, path, extracted=extracted)
                 return self._file_mapping[filename]
 
             # Write file to temporary file
@@ -261,25 +272,30 @@ def download_file(self, path, title, default_ext=None, preset=None):
                 with open(config.get_storage_path(filename), 'wb') as destf:
                     shutil.copyfileobj(tempf, destf)
 
+                # If a video file, check its resolution
+                if extension == file_formats.MP4:
+                    preset = check_video_resolution(config.get_storage_path(filename))
+
                 # Keep track of downloaded file
-                self.track_file(filename, file_size, preset, path)
+                self.track_file(filename, file_size, preset, original_filepath, extracted=extracted)
                 if config.VERBOSE:
                     sys.stderr.write("\n\t--- Downloaded {}".format(filename))
                 return self._file_mapping[filename]
 
         # Catch errors related to reading file path and handle silently
-        except (HTTPError, ConnectionError, InvalidURL, InvalidSchema, IOError):
+        except (HTTPError, ConnectionError, InvalidURL, UnicodeDecodeError, UnicodeError, InvalidSchema, IOError):
             self.failed_files += [(path,title)]
             return False;
 
-    def track_file(self, filename, file_size, preset, path=None, original_filename='[File]'):
+    def track_file(self, filename, file_size, preset, path=None, original_filename='[File]', extracted=False):
         """ track_file: record which file has been downloaded along with metadata
             Args:
                 filename (str): name of file to track
                 file_size (int): size of file
                 preset (str): preset to assign to file
                 path (str): source path of file (optional)
                 original_filename (str): file's original name (optional)
+                extracted (bool): indicates whether file has been extracted automatically (optional)
             Returns: None
         """
         self.files += [filename]
@@ -288,13 +304,13 @@ def track_file(self, filename, file_size, preset, path=None, original_filename='
             'preset' : preset,
             'filename' : filename,
             'original_filename' : original_filename,
+            'extracted' : extracted,
         }
         self._file_mapping.update({filename : file_data})
 
         if path is not None:
             self.file_store.update({path:file_data})
 
-
     def download_files(self,files, title, default_ext=None):
         """ download_files: download list of files
             Args:
@@ -310,3 +326,58 @@ def download_files(self,files, title, default_ext=None):
             if result:
                 file_list += [result]
         return file_list
+
+    def derive_thumbnail(self, filepath, title):
+        """ derive_thumbnail: derive video's thumbnail
+            Args:
+                filepath (str): path to video file
+                title (str): name of node in case of error
+            Returns: None
+        """
+        with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.PNG)) as tempf:
+            tempf.close()
+            extract_thumbnail_from_video(filepath, tempf.name, overwrite=True)
+            return self.download_file(tempf.name, title, extracted=True, original_filepath=filepath + " (thumbnail)")
+
+    def compress_file(self, filepath, title):
+        """ compress_file: compress the video to a lower resolution
+            Args:
+                filepath (str): path to video file
+                title (str): name of node in case of error
+            Returns: None
+        """
+        # If file has already been compressed, return the compressed file data
+        if self.check_downloaded_file(filepath) and self.file_store[filepath].get('extracted'):
+            if config.VERBOSE:
+                sys.stderr.write("\n\tFound compressed file for {}".format(filepath))
+            return self.track_existing_file(filepath)
+
+        # Otherwise, compress the file
+        with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.MP4)) as tempf:
+            tempf.close()
+            compress_video(filepath, tempf.name, overwrite=True)
+            return self.download_file(tempf.name, title, extracted=True, original_filepath=filepath)
+
+    def convert_base64_to_file(self, text, title, preset=None):
+        """ convert_base64_to_file: Writes base64 encoding to file
+            Args:
+                text (str): text to parse
+            Returns: dict of file data
+        """
+        # Get hash of content for tracking purposes
+        hashed_content = hashlib.md5()
+        hashed_content.update(text.encode('utf-8'))
+        filepath = hashed_content.hexdigest() + " (encoded)"
+
+        # If file has already been encoded, return the encoded file data
+        if self.check_downloaded_file(filepath):
+            if config.VERBOSE:
+                sys.stderr.write("\n\tFound encoded file for {}".format(filepath))
+            return self.track_existing_file(filepath)
+
+        if config.VERBOSE:
+            sys.stderr.write("\n\tConverting base64 to file")
+        with tempfile.NamedTemporaryFile(suffix=".{}".format(file_formats.PNG)) as tempf:
+            tempf.close()
+            write_base64_to_file(text, tempf.name)
+            return self.download_file(tempf.name, title, preset=preset, extracted=True, original_filepath=filepath)
diff --git a/ricecooker/managers/progress.py b/ricecooker/managers/progress.py
@@ -24,13 +24,14 @@ class Status(Enum):
     CONSTRUCT_CHANNEL = 1
     CREATE_TREE = 2
     DOWNLOAD_FILES = 3
-    GET_FILE_DIFF = 4
-    START_UPLOAD = 5
-    UPLOADING_FILES = 6
-    UPLOAD_CHANNEL = 7
-    PUBLISH_CHANNEL = 8
-    DONE = 9
-    LAST = 10
+    COMPRESS_FILES = 4
+    GET_FILE_DIFF = 5
+    START_UPLOAD = 6
+    UPLOADING_FILES = 7
+    UPLOAD_CHANNEL = 8
+    PUBLISH_CHANNEL = 9
+    DONE = 10
+    LAST = 11
 
 
 class RestoreManager:
@@ -171,6 +172,20 @@ def set_files(self, files_downloaded, file_mapping, files_failed):
                 files_failed ([str]): list of files that failed to download
             Returns: None
         """
+        self.status = Status.COMPRESS_FILES # Set status to next step
+        self.files_downloaded = files_downloaded
+        self.file_mapping = file_mapping
+        self.files_failed = files_failed
+        self.record_progress()
+
+    def set_compressed_files(self, files_downloaded, file_mapping, files_failed):
+        """ set_compressed_files: records progress from compressing files
+            Args:
+                files_downloaded ([str]): list of files that have been downloaded
+                file_mapping ({filename:...}): filenames mapped to metadata
+                files_failed ([str]): list of files that failed to download
+            Returns: None
+        """
         self.status = Status.GET_FILE_DIFF # Set status to next step
         self.files_downloaded = files_downloaded
         self.file_mapping = file_mapping