From 6650da0babcd0b74efff34d18aebd682b02ac5a6 Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Sun, 14 Feb 2016 21:15:37 +0100 Subject: [PATCH] Set file timestamps on downloaded photos This change also adds unit tests and instructions on how to run them (just do `py.test`). The rationale for this change is that I wanted to copy photos and videos from Flickr into Google Photo. And for Google Photo to know when different photos have been taken, I'm guessing that it needs correctly timestamped files. To be able to get `py.test` to run the tests, the main script had to have a `.py` file extension, so I added that. This change has been tested on 33GB of photos and videos. --- .gitignore | 2 + README.md | 5 ++ __init__.py | 0 flickrmirrorer => flickrmirrorer.py | 76 +++++++++++++++++++++++++++-- tox.ini | 2 + 5 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 __init__.py rename flickrmirrorer => flickrmirrorer.py (91%) diff --git a/.gitignore b/.gitignore index 47def24..2fdb8b4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /env/ +/.cache/ +*.pyc diff --git a/README.md b/README.md index d631a1b..2c34f2d 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,11 @@ Errors are printed to stderr. To see more options run with the --help flag. +Running unit tests +================== +Run [`py.test`](http://pytest.org/). + + TODO ==== * Mirror comments diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/flickrmirrorer b/flickrmirrorer.py similarity index 91% rename from flickrmirrorer rename to flickrmirrorer.py index 07d2f6f..ea7c3a2 100755 --- a/flickrmirrorer +++ b/flickrmirrorer.py @@ -46,6 +46,9 @@ import sys import webbrowser from six.moves import urllib +import dateutil.parser +import datetime +import time try: # We try importing simplejson first because it's faster than json @@ -123,6 +126,56 @@ def _validate_json_response(rsp): sys.exit(1) +def test_known_timestamp(): + timestamp = _get_timestamp({ + 'datetakenunknown': '0', + 'datetaken': '2015-11-02 12:35:07' + }) + assert timestamp.isoformat() == "2015-11-02T12:35:07" + + +def test_plain_title_timestamp(): + timestamp = _get_timestamp({ + 'datetakenunknown': '1', + 'datetaken': '2014-10-01 13:45:37', + 'title': '20151130_135610' + }) + assert timestamp.isoformat() == "2015-11-30T13:56:10" + + +def test_unparseable_title_timestamp(): + timestamp = _get_timestamp({ + 'datetakenunknown': '1', + 'datetaken': '2014-10-01 13:45:37', + 'title': 'flaskpost' + }) + + # Fall back on datetaken if we can't parse the date from the title + assert timestamp.isoformat() == "2014-10-01T13:45:37" + + +def _get_timestamp(photo): + """ + Return photo timestamp, get it from: + 1. datetaken unless datetakenunknown + 2. parse from photo title 'YYYYMMDD_HHmmss' + 3. datetaken anyway; it's available even if unknown, so we just go with + whatever Flickr made up for us + """ + if photo['datetakenunknown'] == "0": + return dateutil.parser.parse(photo['datetaken']) + + try: + parsed = datetime.datetime.strptime(photo['title'], '%Y%m%d_%H%M%S') + if parsed.year > 2000 and parsed < datetime.datetime.now(): + return parsed + except ValueError: + # Unable to parse photo title as datetime + pass + + return dateutil.parser.parse(photo['datetaken']) + + class FlickrMirrorer(object): dest_dir = None photostream_dir = None @@ -321,11 +374,13 @@ def _download_photo(self, photo): sys.stderr.write('Error: %s exists but is not a file. This is not allowed.\n' % metadata_filename) sys.exit(1) - # Check if we should fetch the image - if not os.path.exists(photo_filename) \ - or int(photo['lastupdate']) >= os.lstat(photo_filename).st_mtime: - # We don't have this photo or the version on the server is newer + # Download photo if photo doesn't exist, if metadata doesn't exist or if + # metadata has changed + should_download_photo = not os.path.exists(photo_filename) + should_download_photo |= not os.path.exists(metadata_filename) + should_download_photo |= self._is_file_different(metadata_filename, photo) + if should_download_photo: if not os.path.exists(photo_filename): self.new_photos += 1 else: @@ -355,6 +410,10 @@ def _download_photo(self, photo): 'Skipping metadata for %s because we already have it' % photo_basename) + timestamp = _get_timestamp(photo) + self._set_timestamp_if_changed(timestamp, photo_filename) + self._set_timestamp_if_changed(timestamp, metadata_filename) + return {photo_basename, metadata_basename} def _mirror_albums(self): @@ -566,6 +625,15 @@ def _is_file_different(filename, data): sys.exit(1) return True + def _set_timestamp_if_changed(self, timestamp, file): + stat0 = os.stat(file) + timestamp_since_epoch = time.mktime(timestamp.timetuple()) + os.utime(file, (timestamp_since_epoch, timestamp_since_epoch)) + + stat1 = os.stat(file) + if stat0.st_mtime != stat1.st_mtime: + self._verbose("%s: Re-timestamped to %s" % (os.path.basename(file), timestamp)) + def _write_json_if_changed(self, filename, data): """Write the given data to the specified filename, but only if it's different from what is currently there. Return true if the file was diff --git a/tox.ini b/tox.ini index 6deafc2..3dcba73 100644 --- a/tox.ini +++ b/tox.ini @@ -1,2 +1,4 @@ [flake8] max-line-length = 120 +[pytest] +python_files = flickrmirrorer.py