Skip to content

Commit

Permalink
Checksums: Don't reupload if size and checksum are unchanged #3235
Browse files Browse the repository at this point in the history
* Compute the content checksum (in addition to the optional
  transmission checksum) during upload (.eml files only)

* Add hook to compute and compare the checksum in csync_update

* Add content checksum to database, remove transmission checksum
  • Loading branch information
ckamm committed Nov 23, 2015
1 parent bdf830f commit a25f094
Show file tree
Hide file tree
Showing 23 changed files with 315 additions and 141 deletions.
1 change: 1 addition & 0 deletions csync/src/csync.c
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,7 @@ void csync_file_stat_free(csync_file_stat_t *st)
SAFE_FREE(st->directDownloadCookies);
SAFE_FREE(st->etag);
SAFE_FREE(st->destpath);
SAFE_FREE(st->checksum);
SAFE_FREE(st);
}
}
6 changes: 6 additions & 0 deletions csync/src/csync.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,12 @@ typedef void (*csync_vio_closedir_hook) (csync_vio_handle_t *dhhandle,
typedef int (*csync_vio_stat_hook) (csync_vio_handle_t *dhhandle,
void *userdata);

/* compute the checksum of the given \a checksumTypeId for \a path
* and return true if it's the same as \a checksum */
typedef bool (*csync_checksum_hook) (const char *path,
uint32_t checksumTypeId, const char *checksum,
void *userdata);

/**
* @brief Allocate a csync context.
*
Expand Down
8 changes: 8 additions & 0 deletions csync/src/csync_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ struct csync_s {
csync_vio_readdir_hook remote_readdir_hook;
csync_vio_closedir_hook remote_closedir_hook;
void *vio_userdata;

/* hook for comparing checksums of files during discovery */
csync_checksum_hook checksum_hook;
void *checksum_userdata;

} callbacks;
c_strlist_t *excludes;

Expand Down Expand Up @@ -192,6 +197,9 @@ struct csync_file_stat_s {
char *directDownloadCookies;
char remotePerm[REMOTE_PERM_BUF_SIZE+1];

char *checksum;
uint32_t checksumTypeId;

CSYNC_STATUS error_status;

enum csync_instructions_e instruction; /* u32 */
Expand Down
15 changes: 11 additions & 4 deletions csync/src/csync_statedb.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ int csync_statedb_close(CSYNC *ctx) {
return rc;
}

#define METADATA_COLUMNS "phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote, contentChecksum, contentChecksumTypeId"

// This funciton parses a line from the metadata table into the given csync_file_stat
// structure which it is also allocating.
// Note that this function calls laso sqlite3_step to actually get the info from db and
Expand Down Expand Up @@ -286,6 +288,11 @@ static int _csync_file_stat_from_metadata_table( csync_file_stat_t **st, sqlite3
if(column_count > 13) {
(*st)->has_ignored_files = sqlite3_column_int(stmt, 13);
}
if(column_count > 15 && sqlite3_column_int(stmt, 15)) {
(*st)->checksum = c_strdup( (char*) sqlite3_column_text(stmt, 14));
(*st)->checksumTypeId = sqlite3_column_int(stmt, 15);
}

}
} else {
if( rc != SQLITE_DONE ) {
Expand All @@ -307,7 +314,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_hash(CSYNC *ctx,
}

if( ctx->statedb.by_hash_stmt == NULL ) {
const char *hash_query = "SELECT * FROM metadata WHERE phash=?1";
const char *hash_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE phash=?1";

SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, hash_query, strlen(hash_query), &ctx->statedb.by_hash_stmt, NULL));
ctx->statedb.lastReturnValue = rc;
Expand Down Expand Up @@ -350,7 +357,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_file_id(CSYNC *ctx,
}

if( ctx->statedb.by_fileid_stmt == NULL ) {
const char *query = "SELECT * FROM metadata WHERE fileid=?1";
const char *query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE fileid=?1";

SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, query, strlen(query), &ctx->statedb.by_fileid_stmt, NULL));
ctx->statedb.lastReturnValue = rc;
Expand Down Expand Up @@ -390,7 +397,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_inode(CSYNC *ctx,
}

if( ctx->statedb.by_inode_stmt == NULL ) {
const char *inode_query = "SELECT * FROM metadata WHERE inode=?1";
const char *inode_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE inode=?1";

SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, inode_query, strlen(inode_query), &ctx->statedb.by_inode_stmt, NULL));
ctx->statedb.lastReturnValue = rc;
Expand Down Expand Up @@ -433,7 +440,7 @@ int csync_statedb_get_below_path( CSYNC *ctx, const char *path ) {
* In other words, anything that is between path+'/' and path+'0',
* (because '0' follows '/' in ascii)
*/
const char *below_path_query = "SELECT phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote FROM metadata WHERE path > (?||'/') AND path < (?||'0')";
const char *below_path_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE path > (?||'/') AND path < (?||'0')";
SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, below_path_query, -1, &stmt, NULL));
ctx->statedb.lastReturnValue = rc;
if( rc != SQLITE_OK ) {
Expand Down
52 changes: 35 additions & 17 deletions csync/src/csync_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,25 +270,43 @@ static int _csync_detect_update(CSYNC *ctx, const char *file,
((int64_t) fs->mtime), ((int64_t) tmp->modtime),
fs->etag, tmp->etag, (uint64_t) fs->inode, (uint64_t) tmp->inode,
(uint64_t) fs->size, (uint64_t) tmp->size, fs->remotePerm, tmp->remotePerm, tmp->has_ignored_files );
if((ctx->current == REMOTE_REPLICA && !c_streq(fs->etag, tmp->etag ))
|| (ctx->current == LOCAL_REPLICA && (!_csync_mtime_equal(fs->mtime, tmp->modtime)
// zero size in statedb can happen during migration
|| (tmp->size != 0 && fs->size != tmp->size)
if (ctx->current == REMOTE_REPLICA && !c_streq(fs->etag, tmp->etag)) {
st->instruction = CSYNC_INSTRUCTION_EVAL;
goto out;
}
if (ctx->current == LOCAL_REPLICA &&
(!_csync_mtime_equal(fs->mtime, tmp->modtime)
// zero size in statedb can happen during migration
|| (tmp->size != 0 && fs->size != tmp->size)
#if 0
|| fs->inode != tmp->inode
/* Comparison of the local inode is disabled because people reported problems
* on windows with flacky inode values, see github bug #779
*
* The inode needs to be observed because:
* $> echo a > a.txt ; echo b > b.txt
* both files have the same mtime
* sync them.
* $> rm a.txt && mv b.txt a.txt
* makes b.txt appearing as a.txt yet a sync is not performed because
* both have the same modtime as mv does not change that.
*/
|| fs->inode != tmp->inode
#endif
))) {
/* Comparison of the local inode is disabled because people reported problems
* on windows with flacky inode values, see github bug #779
*
* The inode needs to be observed because:
* $> echo a > a.txt ; echo b > b.txt
* both files have the same mtime
* sync them.
* $> rm a.txt && mv b.txt a.txt
* makes b.txt appearing as a.txt yet a sync is not performed because
* both have the same modtime as mv does not change that.
*/
)) {

if (fs->size == tmp->size && tmp->checksumTypeId) {
bool checksumIdentical = false;
if (ctx->callbacks.checksum_hook) {
checksumIdentical = ctx->callbacks.checksum_hook(
file, tmp->checksumTypeId, tmp->checksum,
ctx->callbacks.checksum_userdata);
}
if (checksumIdentical) {
st->instruction = CSYNC_INSTRUCTION_NONE;
st->should_update_metadata = true;
goto out;
}
}
st->instruction = CSYNC_INSTRUCTION_EVAL;
goto out;
}
Expand Down
6 changes: 6 additions & 0 deletions csync/tests/csync_tests/check_csync_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ static void statedb_create_metadata_table(sqlite3 *db)
"modtime INTEGER(8),"
"type INTEGER,"
"md5 VARCHAR(32),"
"fileid VARCHAR(128),"
"remotePerm VARCHAR(128),"
"filesize BIGINT,"
"ignoredChildrenRemote INT,"
"contentChecksum TEXT,"
"contentChecksumTypeId INTEGER,"
"PRIMARY KEY(phash));";

rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
Expand Down
14 changes: 3 additions & 11 deletions src/libsync/propagatedownload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,7 @@ void PropagateDownloadFileQNAM::start()
if (_resumeStart == _item->_size) {
qDebug() << "File is already complete, no need to download";
_tmpFile.close();

// Unfortunately we lost the checksum header, if any...
QByteArray noChecksumData;
downloadFinished(noChecksumData, noChecksumData);
downloadFinished();
return;
}
}
Expand Down Expand Up @@ -537,7 +534,7 @@ void PropagateDownloadFileQNAM::slotGetFinished()
// as this is (still) also correct.
ValidateChecksumHeader *validator = new ValidateChecksumHeader(this);
connect(validator, SIGNAL(validated(QByteArray,QByteArray)),
SLOT(downloadFinished(QByteArray,QByteArray)));
SLOT(downloadFinished()));
connect(validator, SIGNAL(validationFailed(QString)),
SLOT(slotChecksumFail(QString)));
auto checksumHeader = job->reply()->rawHeader(checkSumHeaderC);
Expand Down Expand Up @@ -621,13 +618,8 @@ static void handleRecallFile(const QString &fn)
}
} // end namespace

void PropagateDownloadFileQNAM::downloadFinished(const QByteArray& checksumType, const QByteArray& checksum)
void PropagateDownloadFileQNAM::downloadFinished()
{
if (!checksumType.isEmpty()) {
_item->_transmissionChecksum = checksum;
_item->_transmissionChecksumType = checksumType;
}

QString fn = _propagator->getFilePath(_item->_file);

// In case of file name clash, report an error
Expand Down
2 changes: 1 addition & 1 deletion src/libsync/propagatedownload.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class PropagateDownloadFileQNAM : public PropagateItemJob {
private slots:
void slotGetFinished();
void abort() Q_DECL_OVERRIDE;
void downloadFinished(const QByteArray& checksumType, const QByteArray& checksum);
void downloadFinished();
void slotDownloadProgress(qint64,qint64);
void slotChecksumFail( const QString& errMsg );

Expand Down
4 changes: 2 additions & 2 deletions src/libsync/propagateremotemove.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ void PropagateRemoteMove::finalize()

SyncJournalFileRecord record(*_item, _propagator->getFilePath(_item->_renameTarget));
record._path = _item->_renameTarget;
record._transmissionChecksum = oldRecord._transmissionChecksum;
record._transmissionChecksumType = oldRecord._transmissionChecksumType;
record._contentChecksum = oldRecord._contentChecksum;
record._contentChecksumType = oldRecord._contentChecksumType;

_propagator->_journal->setFileRecord(record);
_propagator->_journal->commit("Remote Rename");
Expand Down
70 changes: 41 additions & 29 deletions src/libsync/propagateupload.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,24 +216,38 @@ void PropagateUploadFileQNAM::start()

_stopWatch.start();

auto supportedChecksumTypes = _propagator->account()->capabilities().supportedChecksumTypes();

// If we already have a checksum header and the checksum type is supported
// by the server, we keep that - otherwise recompute.
//
// Note: Currently we *always* recompute because we usually only upload
// files that have changed and thus have a new checksum. But if an earlier
// phase computed a checksum, this is where we would make use of it.
if (!_item->_transmissionChecksumType.isEmpty()) {
if (supportedChecksumTypes.contains(_item->_transmissionChecksumType)) {
// TODO: We could validate the old checksum and thereby determine whether
// an upload is necessary or not.
slotStartUpload(_item->_transmissionChecksumType, _item->_transmissionChecksum);
return;
}
// Compute the content checksum.
auto computeChecksum = new ComputeChecksum(this);
// We currently only do content checksums for the particular .eml case
// This should be done more generally in the future!
if (filePath.endsWith(QLatin1String(".eml"), Qt::CaseInsensitive)) {
computeChecksum->setChecksumType("MD5");
} else {
computeChecksum->setChecksumType(QByteArray());
}

connect(computeChecksum, SIGNAL(done(QByteArray,QByteArray)),
SLOT(slotComputeTransmissionChecksum(QByteArray,QByteArray)));
computeChecksum->start(filePath);
}

void PropagateUploadFileQNAM::slotComputeTransmissionChecksum(const QByteArray& contentChecksumType, const QByteArray& contentChecksum)
{
_item->_contentChecksum = contentChecksum;
_item->_contentChecksumType = contentChecksumType;

_stopWatch.addLapTime(QLatin1String("ContentChecksum"));
_stopWatch.start();

// Reuse the content checksum as the transmission checksum if possible
const auto supportedTransmissionChecksums =
_propagator->account()->capabilities().supportedChecksumTypes();
if (supportedTransmissionChecksums.contains(contentChecksumType)) {
slotStartUpload(contentChecksumType, contentChecksum);
return;
}

// Compute a new checksum.
// Compute the transmission checksum.
auto computeChecksum = new ComputeChecksum(this);
if (uploadChecksumEnabled()) {
computeChecksum->setChecksumType(_propagator->account()->capabilities().preferredChecksumType());
Expand All @@ -243,27 +257,22 @@ void PropagateUploadFileQNAM::start()

connect(computeChecksum, SIGNAL(done(QByteArray,QByteArray)),
SLOT(slotStartUpload(QByteArray,QByteArray)));
const QString filePath = _propagator->getFilePath(_item->_file);
computeChecksum->start(filePath);
}

void PropagateUploadFileQNAM::slotStartUpload(const QByteArray& checksumType, const QByteArray& checksum)
void PropagateUploadFileQNAM::slotStartUpload(const QByteArray& transmissionChecksumType, const QByteArray& transmissionChecksum)
{
// Store the computed checksum in the database, if different
if (checksumType != _item->_transmissionChecksumType
|| checksum != _item->_transmissionChecksum) {
_item->_transmissionChecksum = checksum;
_item->_transmissionChecksumType = checksumType;
_propagator->_journal->updateFileRecordChecksum(
_item->_file, checksum, checksumType);
}
_transmissionChecksum = transmissionChecksum;
_transmissionChecksumType = transmissionChecksumType;

const QString fullFilePath = _propagator->getFilePath(_item->_file);

if (!FileSystem::fileExists(fullFilePath)) {
done(SyncFileItem::SoftError, tr("File Removed"));
return;
}
_stopWatch.addLapTime(QLatin1String("Checksum"));
_stopWatch.addLapTime(QLatin1String("TransmissionChecksum"));

time_t prevModtime = _item->_modtime; // the _item value was set in PropagateUploadFileQNAM::start()
// but a potential checksum calculation could have taken some time during which the file could
Expand Down Expand Up @@ -511,9 +520,9 @@ void PropagateUploadFileQNAM::startNextChunk()
isFinalChunk = true;
}

if (isFinalChunk && !_item->_transmissionChecksumType.isEmpty()) {
if (isFinalChunk && !_transmissionChecksumType.isEmpty()) {
headers[checkSumHeaderC] = makeChecksumHeader(
_item->_transmissionChecksumType, _item->_transmissionChecksum);
_transmissionChecksumType, _transmissionChecksum);
}

if (! device->prepareAndOpen(_propagator->getFilePath(_item->_file), chunkStart, currentChunkSize)) {
Expand Down Expand Up @@ -724,7 +733,10 @@ void PropagateUploadFileQNAM::slotPutFinished()

// performance logging
_item->_requestDuration = _stopWatch.stop();
qDebug() << "*==* duration UPLOAD" << _item->_size << _stopWatch.durationOfLap(QLatin1String("Checksum")) << _item->_requestDuration;
qDebug() << "*==* duration UPLOAD" << _item->_size
<< _stopWatch.durationOfLap(QLatin1String("ContentChecksum"))
<< _stopWatch.durationOfLap(QLatin1String("TransmissionChecksum"))
<< _item->_requestDuration;

finalize(*_item);
}
Expand Down
6 changes: 5 additions & 1 deletion src/libsync/propagateupload.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ class PropagateUploadFileQNAM : public PropagateItemJob {
// measure the performance of checksum calc and upload
Utility::StopWatch _stopWatch;

QByteArray _transmissionChecksum;
QByteArray _transmissionChecksumType;

public:
PropagateUploadFileQNAM(OwncloudPropagator* propagator,const SyncFileItemPtr& item)
: PropagateItemJob(propagator, item), _startChunk(0), _currentChunk(0), _chunkCount(0), _transferId(0), _finished(false) {}
Expand All @@ -195,7 +198,8 @@ private slots:
void startNextChunk();
void finalize(const SyncFileItem&);
void slotJobDestroyed(QObject *job);
void slotStartUpload(const QByteArray& checksumType, const QByteArray& checksum);
void slotStartUpload(const QByteArray& transmissionChecksumType, const QByteArray& transmissionChecksum);
void slotComputeTransmissionChecksum(const QByteArray& contentChecksumType, const QByteArray& contentChecksum);

private:
void startPollJob(const QString& path);
Expand Down
4 changes: 2 additions & 2 deletions src/libsync/propagatorjobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ void PropagateLocalRename::start()

SyncJournalFileRecord record(*_item, targetFile);
record._path = _item->_renameTarget;
record._transmissionChecksum = oldRecord._transmissionChecksum;
record._transmissionChecksumType = oldRecord._transmissionChecksumType;
record._contentChecksum = oldRecord._contentChecksum;
record._contentChecksumType = oldRecord._contentChecksumType;

if (!_item->_isDirectory) { // Directories are saved at the end
_propagator->_journal->setFileRecord(record);
Expand Down
Loading

0 comments on commit a25f094

Please sign in to comment.